From 475463b1843767b50e26aeb3bb70ddbee9ec85ee Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Tue, 27 Aug 2024 17:24:00 +0200
Subject: [PATCH 01/50] [helas] in gg_tt.mad, proof of concept for removing
 template/inline FFVs and for compiling them as separate object files (related
 to splitting kernels)

---
 .../SubProcesses/P1_gg_ttx/CPPProcess.cc      | 107 ++++++++++++++++--
 1 file changed, 99 insertions(+), 8 deletions(-)

diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
index 45fb7860e9..bc8b342eb2 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
@@ -177,6 +177,91 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
+#ifdef MGONGPUCPP_GPUIMPL
+  namespace mg5amcGpu
+#else
+  namespace mg5amcCpu
+#endif
+  {
+#ifdef MGONGPUCPP_GPUIMPL
+    using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+    using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+    using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+    using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+    using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+    using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+    using NUM_ACCESS = DeviceAccessNumerators;    // non-trivial access: buffer includes all events
+    using DEN_ACCESS = DeviceAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#else
+    using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+    using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+    using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+    using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+    using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+    using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+    using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+    using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif /* clang-format on */
+    
+    // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+    __device__ void
+    helas_VVV1P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+    {
+      return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+    }
+
+    // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+    __device__ void
+    helas_FFV1_0( const fptype allF1[],
+                  const fptype allF2[],
+                  const fptype allV3[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] )
+    {
+      return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+    }
+
+    // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+    __device__ void
+    helas_FFV1_1( const fptype allF2[],
+                  const fptype allV3[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  const fptype M1,
+                  const fptype W1,
+                  fptype allF1[] )
+    {
+      return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+    }
+
+    // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+    __device__ void
+    helas_FFV1_2( const fptype allF1[],
+                  const fptype allV3[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  const fptype M2,
+                  const fptype W2,
+                  fptype allF2[] )
+    {
+      return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+    }
+    
+  }
+  
+  //--------------------------------------------------------------------------
+
   // Evaluate |M|^2 for each subprocess
   // NB: calculate_wavefunctions ADDS |M|^2 for a given ihel to the running sum of |M|^2 over helicities for the given event(s)
   // (similarly, it also ADDS the numerator and denominator for a given ihel to their running sums over helicities)
@@ -204,7 +289,7 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
-    using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+    //using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +301,7 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
-    using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+    //using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -328,10 +413,12 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 );
 
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
+      //VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
+      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      //FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -342,10 +429,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 3 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      //FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      //FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -355,10 +444,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 3 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      //FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      //FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );

From 6b0ba37f31dd210f4341af464d28f0791ea62408 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 10:35:50 +0200
Subject: [PATCH 02/50] [helas] in gg_tt.mad and CODEGEN, add comments in
 MemoryAccessGs.h and MemoryAccessMomenta.h

---
 .../madgraph/iolibs/template_files/gpu/MemoryAccessGs.h         | 2 +-
 .../madgraph/iolibs/template_files/gpu/MemoryAccessMomenta.h    | 2 +-
 epochX/cudacpp/gg_tt.mad/SubProcesses/MemoryAccessGs.h          | 2 +-
 epochX/cudacpp/gg_tt.mad/SubProcesses/MemoryAccessMomenta.h     | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MemoryAccessGs.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MemoryAccessGs.h
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MemoryAccessMomenta.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 
diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/gg_tt.mad/SubProcesses/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/MemoryAccessGs.h
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/gg_tt.mad/SubProcesses/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 

From 5d262448133e06dbb3cf4f0b0c2eb6f74c89715c Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 10:36:30 +0200
Subject: [PATCH 03/50] [helas] in gg_tt.mad, compile HelAmps.o as a separate
 object file in the P subdirectory (depends on npar) - build succeeds for cpp,
 link fails for cuda

ccache /usr/local/cuda-12.0/bin/nvcc  -I. -I../../src  -Xcompiler -O3 -gencode arch=compute_70,code=compute_70 -gencode arch=compute_70,code=sm_70 -lineinfo -use_fast_math -I/usr/local/cuda-12.0/include/ -DUSE_NVTX  -std=c++17  -ccbin /usr/lib64/ccache/g++ -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE -Xcompiler -fPIC -c -x cu CPPProcess.cc -o CPPProcess_cuda.o
ptxas fatal   : Unresolved extern function '_ZN9mg5amcGpu14helas_VVV1P0_1EPKdS1_S1_dddPd'
---
 .../SubProcesses/P1_gg_ttx/CPPProcess.cc      |  85 ------------
 .../SubProcesses/P1_gg_ttx/HelAmps.cc         | 121 ++++++++++++++++++
 .../cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk |   3 +
 epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h     |  47 +++++++
 4 files changed, 171 insertions(+), 85 deletions(-)
 create mode 100644 epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc

diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
index bc8b342eb2..9b3f055223 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
@@ -177,91 +177,6 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
-#ifdef MGONGPUCPP_GPUIMPL
-  namespace mg5amcGpu
-#else
-  namespace mg5amcCpu
-#endif
-  {
-#ifdef MGONGPUCPP_GPUIMPL
-    using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
-    using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
-    using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
-    using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
-    using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
-    using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
-#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
-    using NUM_ACCESS = DeviceAccessNumerators;    // non-trivial access: buffer includes all events
-    using DEN_ACCESS = DeviceAccessDenominators;  // non-trivial access: buffer includes all events
-#endif
-#else
-    using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
-    using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
-    using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
-    using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
-    using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
-    using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
-#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
-    using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
-    using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
-#endif
-#endif /* clang-format on */
-    
-    // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
-    __device__ void
-    helas_VVV1P0_1( const fptype allV2[],
-                    const fptype allV3[],
-                    const fptype allCOUP[],
-                    const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] )
-    {
-      return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
-    }
-
-    // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
-    __device__ void
-    helas_FFV1_0( const fptype allF1[],
-                  const fptype allF2[],
-                  const fptype allV3[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] )
-    {
-      return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
-    }
-
-    // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
-    __device__ void
-    helas_FFV1_1( const fptype allF2[],
-                  const fptype allV3[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  const fptype M1,
-                  const fptype W1,
-                  fptype allF1[] )
-    {
-      return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
-    }
-
-    // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
-    __device__ void
-    helas_FFV1_2( const fptype allF1[],
-                  const fptype allV3[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  const fptype M2,
-                  const fptype W2,
-                  fptype allF2[] )
-    {
-      return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
-    }
-    
-  }
-  
-  //--------------------------------------------------------------------------
-
   // Evaluate |M|^2 for each subprocess
   // NB: calculate_wavefunctions ADDS |M|^2 for a given ihel to the running sum of |M|^2 over helicities for the given event(s)
   // (similarly, it also ADDS the numerator and denominator for a given ihel to their running sums over helicities)
diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc
new file mode 100644
index 0000000000..a473ccb568
--- /dev/null
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc
@@ -0,0 +1,121 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// Helicity amplitudes for calculating the matrix elements for
+// Process: g g > t t~ WEIGHTED<=2 @1
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+//#include "coloramps.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  helas_VVV1P0_1( const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  const fptype M1,
+                  const fptype W1,
+                  fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  helas_FFV1_0( const fptype allF1[],
+                const fptype allF2[],
+                const fptype allV3[],
+                const fptype allCOUP[],
+                const double Ccoeff,
+                fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  helas_FFV1_1( const fptype allF2[],
+                const fptype allV3[],
+                const fptype allCOUP[],
+                const double Ccoeff,
+                const fptype M1,
+                const fptype W1,
+                fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  helas_FFV1_2( const fptype allF1[],
+                const fptype allV3[],
+                const fptype allCOUP[],
+                const double Ccoeff,
+                const fptype M2,
+                const fptype W2,
+                fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+}
diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
index 9cff5e1a60..458a6599da 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
@@ -776,6 +776,9 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
diff --git a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h
index 1c2d0cd26a..4d07510327 100644
--- a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h
@@ -1032,8 +1032,55 @@ namespace mg5amcCpu
     return;
   }
 
+  //==========================================================================
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  helas_VVV1P0_1( const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  const fptype M1,
+                  const fptype W1,
+                  fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  helas_FFV1_0( const fptype allF1[],
+                const fptype allF2[],
+                const fptype allV3[],
+                const fptype allCOUP[],
+                const double Ccoeff,
+                fptype allvertexes[] );
+
   //--------------------------------------------------------------------------
 
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  helas_FFV1_1( const fptype allF2[],
+                const fptype allV3[],
+                const fptype allCOUP[],
+                const double Ccoeff,
+                const fptype M1,
+                const fptype W1,
+                fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  helas_FFV1_2( const fptype allF1[],
+                const fptype allV3[],
+                const fptype allCOUP[],
+                const double Ccoeff,
+                const fptype M2,
+                const fptype W2,
+                fptype allF2[] );
+
+  //==========================================================================
+
 } // end namespace
 
 #endif // HelAmps_sm_H

From 24c4fee74333b010057484c088c6dc7fa69c8625 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 10:54:06 +0200
Subject: [PATCH 04/50] [helas] in gg_tt.mad, add RDC to ensure that cuda
 builds succeed and cuda tests succeed

The build issues some warnings however
nvlink warning : SM Arch ('sm_52') not found in './CPPProcess_cuda.o'
nvlink warning : SM Arch ('sm_52') not found in './HelAmps_cuda.o'
nvlink warning : SM Arch ('sm_52') not found in './CPPProcess_cuda.o'
nvlink warning : SM Arch ('sm_52') not found in './HelAmps_cuda.o'
---
 epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
index 458a6599da..7f28252b1e 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
@@ -778,6 +778,8 @@ endif
 
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o

From 7aef7e21d9bc53467062222acd19b0f902ad6c7a Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 11:02:09 +0200
Subject: [PATCH 05/50] [helas] in gg_tt.mad, avoid link warnings when using
 RDC

---
 epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
index 7f28252b1e..b339d6cfe5 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
@@ -791,12 +791,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -967,6 +967,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)

From 77d157cc06a575fa13903de65e2bc1160aafa26f Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 11:30:04 +0200
Subject: [PATCH 06/50] [helas] in gg_tt.mad, clean up 'linked HelAmps'
 implementation: add option HELINL=L and '#ifdef MGONGPU_LINKER_HELAMPS'

---
 .../SubProcesses/P1_gg_ttx/CPPProcess.cc      | 26 +++++---
 .../SubProcesses/P1_gg_ttx/HelAmps.cc         | 58 +++++++++--------
 .../cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk |  5 +-
 epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h     | 62 +++++++++++--------
 .../cudacpp/gg_tt.mad/src/cudacpp_config.mk   |  2 +-
 epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h  | 16 ++++-
 6 files changed, 103 insertions(+), 66 deletions(-)

diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
index 9b3f055223..71c85c86e3 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
@@ -204,7 +204,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
-    //using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
+    using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +218,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
-    //using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
+    using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -328,11 +332,21 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 );
 
-      //VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
+#ifdef MGONGPU_LINKER_HELAMPS
+#define helas_VVV1P0_1 linker_VVV1P0_1
+#define helas_FFV1_0 linker_FFV1_0
+#define helas_FFV1_1 linker_FFV1_1
+#define helas_FFV1_2 linker_FFV1_2
+#else
+#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#endif
+
       helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 1
-      //FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       helas_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
@@ -344,11 +358,9 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 3 ***
 
       // Wavefunction(s) for diagram number 2
-      //FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
       helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      //FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       helas_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
@@ -359,11 +371,9 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 3 ***
 
       // Wavefunction(s) for diagram number 3
-      //FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
       helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 3
-      //FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       helas_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc
index a473ccb568..dbda544a11 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc
@@ -3,6 +3,8 @@
 // Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
 // Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
 
+#ifdef MGONGPU_LINKER_HELAMPS
+
 #include "HelAmps_sm.h"
 
 // -----------------------------------------------------------------------------
@@ -67,51 +69,51 @@ namespace mg5amcCpu
 
   // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
   __device__ void
-  helas_VVV1P0_1( const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  const fptype M1,
-                  const fptype W1,
-                  fptype allV1[] )
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] )
   {
     return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
   __device__ void
-  helas_FFV1_0( const fptype allF1[],
-                const fptype allF2[],
-                const fptype allV3[],
-                const fptype allCOUP[],
-                const double Ccoeff,
-                fptype allvertexes[] )
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
   {
     return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
   __device__ void
-  helas_FFV1_1( const fptype allF2[],
-                const fptype allV3[],
-                const fptype allCOUP[],
-                const double Ccoeff,
-                const fptype M1,
-                const fptype W1,
-                fptype allF1[] )
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] )
   {
     return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
   __device__ void
-  helas_FFV1_2( const fptype allF1[],
-                const fptype allV3[],
-                const fptype allCOUP[],
-                const double Ccoeff,
-                const fptype M2,
-                const fptype W2,
-                fptype allF2[] )
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] )
   {
     return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
   }
@@ -119,3 +121,5 @@ namespace mg5amcCpu
   //--------------------------------------------------------------------------
 
 }
+
+#endif
diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
index b339d6cfe5..f6e21ac493 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
@@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
diff --git a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h
index 4d07510327..9a0bbb66ef 100644
--- a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h
@@ -1034,51 +1034,59 @@ namespace mg5amcCpu
 
   //==========================================================================
 
+#ifdef MGONGPU_LINKER_HELAMPS
+
+  //--------------------------------------------------------------------------
+
   // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
   __device__ void
-  helas_VVV1P0_1( const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  const fptype M1,
-                  const fptype W1,
-                  fptype allV1[] );
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
   // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
   __device__ void
-  helas_FFV1_0( const fptype allF1[],
-                const fptype allF2[],
-                const fptype allV3[],
-                const fptype allCOUP[],
-                const double Ccoeff,
-                fptype allvertexes[] );
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
   // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
   __device__ void
-  helas_FFV1_1( const fptype allF2[],
-                const fptype allV3[],
-                const fptype allCOUP[],
-                const double Ccoeff,
-                const fptype M1,
-                const fptype W1,
-                fptype allF1[] );
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
   // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
   __device__ void
-  helas_FFV1_2( const fptype allF1[],
-                const fptype allV3[],
-                const fptype allCOUP[],
-                const double Ccoeff,
-                const fptype M2,
-                const fptype W2,
-                fptype allF2[] );
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] );
 
+  //--------------------------------------------------------------------------
+
+#endif
+  
   //==========================================================================
 
 } // end namespace
diff --git a/epochX/cudacpp/gg_tt.mad/src/cudacpp_config.mk b/epochX/cudacpp/gg_tt.mad/src/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/gg_tt.mad/src/cudacpp_config.mk
+++ b/epochX/cudacpp/gg_tt.mad/src/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h
index 858546db00..cf6a228859 100644
--- a/epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h
+++ b/epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -78,9 +78,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -147,6 +154,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {

From f105b9ce8c3a8e23bc3a52b84ae9f20bc1e5e216 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 11:58:31 +0200
Subject: [PATCH 07/50] [helas] in tput/teeThroughputX.sh, print out the
 preliminary build time on each log

---
 epochX/cudacpp/tput/teeThroughputX.sh | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/epochX/cudacpp/tput/teeThroughputX.sh b/epochX/cudacpp/tput/teeThroughputX.sh
index f1b6fde306..ccdcb2a9a3 100755
--- a/epochX/cudacpp/tput/teeThroughputX.sh
+++ b/epochX/cudacpp/tput/teeThroughputX.sh
@@ -165,6 +165,9 @@ for step in $steps; do
         for helinl in $helinls; do
           inl=; if [ "${helinl}" == "1" ]; then inl=" -inlonly"; fi
           for hrdcod in $hrdcods; do
+            logfile=logs_${proc#-}_${sufflog}/log_${proc#-}_${sufflog}_${fptype}_inl${helinl}_hrd${hrdcod}.txt
+            if [ "${rndgen}" != "" ]; then logfile=${logfile%.txt}_${rndgen#-}.txt; fi
+            if [ "${rmbsmp}" != "" ]; then logfile=${logfile%.txt}_${rmbsmp#-}.txt; fi
             hrd=; if [ "${hrdcod}" == "1" ]; then hrd=" -hrdonly"; fi
             args="${proc}${sa}${flt}${inl}${hrd} ${dlp}"
             args="${args} ${alpaka}" # optionally disable alpaka tests
@@ -181,16 +184,19 @@ for step in $steps; do
               printf "\n%80s\n" |tr " " "*"
               printf "*** ./throughputX.sh -makeonly ${makej} $args"
               printf "\n%80s\n" |tr " " "*"
+              SECONDS=0 # bash built-in
               if ! ./throughputX.sh -makeonly ${makej} $args; then exit 1; fi
+              BUILDTIME=$(date -d@$SECONDS -u "+$(($SECONDS/86400))d %Hh %Mm %Ss")
+              echo "" | tee $logfile 
+              echo "------------------------------------------------" | tee -a $logfile 
+              echo "Preliminary build completed in $BUILDTIME" | tee -a $logfile 
+              echo "------------------------------------------------" | tee -a $logfile 
             else
-              logfile=logs_${proc#-}_${sufflog}/log_${proc#-}_${sufflog}_${fptype}_inl${helinl}_hrd${hrdcod}.txt
-              if [ "${rndgen}" != "" ]; then logfile=${logfile%.txt}_${rndgen#-}.txt; fi
-              if [ "${rmbsmp}" != "" ]; then logfile=${logfile%.txt}_${rmbsmp#-}.txt; fi
               printf "\n%80s\n" |tr " " "*"
-              printf "*** ./throughputX.sh $args | tee $logfile"
+              printf "*** ./throughputX.sh $args | tee -a $logfile"
               printf "\n%80s\n" |tr " " "*"
               mkdir -p $(dirname $logfile)
-              ./throughputX.sh $args -gtest | tee $logfile 
+              ./throughputX.sh $args -gtest | tee -a $logfile 
               if [ ${PIPESTATUS[0]} -ne "0" ]; then status=2; fi
             fi
           done

From 5f73fbbbec7ce988d5c530d9655d231e8dcb0c65 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 13:19:17 +0200
Subject: [PATCH 08/50] [helas] in tput throughputX.sh and teeThroughputX.sh,
 add the -inlL and -inlLonly options

---
 epochX/cudacpp/tput/teeThroughputX.sh | 25 +++++++++++++++++--------
 epochX/cudacpp/tput/throughputX.sh    | 19 +++++++++++++++----
 2 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/epochX/cudacpp/tput/teeThroughputX.sh b/epochX/cudacpp/tput/teeThroughputX.sh
index ccdcb2a9a3..17d89e886f 100755
--- a/epochX/cudacpp/tput/teeThroughputX.sh
+++ b/epochX/cudacpp/tput/teeThroughputX.sh
@@ -10,7 +10,7 @@ cd $scrdir
 
 function usage()
 {
-  echo "Usage: $0 <processes [-eemumu][-ggtt][-ggttg][-ggttgg][-ggttggg][-gqttq][-heftggbb][-susyggtt][-susyggt1t1][-smeftggtttt]> [-sa] [-noalpaka] [-flt|-fltonly|-mix|-mixonly] [-inl|-inlonly] [-hrd|-hrdonly] [-common|-curhst] [-rmbhst|-bridge] [-makeonly] [-makeclean] [-makej] [-dlp <dyld_library_path>]" # -nofpe is no longer supported
+  echo "Usage: $0 <processes [-eemumu][-ggtt][-ggttg][-ggttgg][-ggttggg][-gqttq][-heftggbb][-susyggtt][-susyggt1t1][-smeftggtttt]> [-sa] [-noalpaka] [-flt|-fltonly|-mix|-mixonly] [-inl|-inlonly|-inlL|-inlLonly] [-hrd|-hrdonly] [-common|-curhst] [-rmbhst|-bridge] [-makeonly] [-makeclean] [-makej] [-dlp <dyld_library_path>]" # -nofpe is no longer supported
   exit 1
 }
 
@@ -28,7 +28,7 @@ smeftggtttt=
 suffs="mad" # DEFAULT code base: madevent + cudacpp as 2nd exporter (logs_*_mad)
 alpaka=
 fptypes="d"
-helinls="0"
+helinls="" # set default later
 hrdcods="0"
 rndgen=
 rmbsmp=
@@ -91,11 +91,17 @@ for arg in $*; do
     if [ "${fptypes}" != "d" ] && [ "${fptypes}" != "m" ]; then echo "ERROR! Options -flt, -fltonly, -mix and -mixonly are incompatible"; usage; fi
     fptypes="m"
   elif [ "$arg" == "-inl" ]; then
-    if [ "${helinls}" == "1" ]; then echo "ERROR! Options -inl and -inlonly are incompatible"; usage; fi
+    if [ "${helinls}" != "" ]; then echo "ERROR! Options -inl, -inlonly, -inlL, -inlLonly are incompatible (and can be specified only once)"; usage; fi
     helinls="0 1"
   elif [ "$arg" == "-inlonly" ]; then
-    if [ "${helinls}" == "0 1" ]; then echo "ERROR! Options -inl and -inlonly are incompatible"; usage; fi
+    if [ "${helinls}" != "" ]; then echo "ERROR! Options -inl, -inlonly, -inlL, -inlLonly are incompatible (and can be specified only once)"; usage; fi
     helinls="1"
+  elif [ "$arg" == "-inlL" ]; then
+    if [ "${helinls}" != "" ]; then echo "ERROR! Options -inl, -inlonly, -inlL, -inlLonly are incompatible (and can be specified only once)"; usage; fi
+    helinls="0 1 L"
+  elif [ "$arg" == "-inlLonly" ]; then
+    if [ "${helinls}" != "" ]; then echo "ERROR! Options -inl, -inlonly, -inlL, -inlLonly are incompatible (and can be specified only once)"; usage; fi
+    helinls="L"
   elif [ "$arg" == "-hrd" ]; then
     if [ "${hrdcods}" == "1" ]; then echo "ERROR! Options -hrd and -hrdonly are incompatible"; usage; fi
     hrdcods="0 1"
@@ -133,6 +139,9 @@ for arg in $*; do
   fi  
 done
 
+# Set defaults a posteriori
+if [ "${helinls}" == "" ]; then helinls="0"; fi
+
 # Workaround for MacOS SIP (SystemIntegrity Protection): set DYLD_LIBRARY_PATH In subprocesses
 if [ "${dlpset}" == "1" ]; then usage; fi
 
@@ -144,9 +153,9 @@ fi
 
 #echo "procs=$procs"
 #echo "suffs=$suffs"
-#echo "df=$df"
-#echo "inl=$inl"
-#echo "hrd=$hrd"
+#echo "fptypes=$fptypes"
+#echo "helinls=$helinls"
+#echo "hrdcods=$hrdcods"
 #echo "steps=$steps"
 ###exit 0
 
@@ -163,7 +172,7 @@ for step in $steps; do
       for fptype in $fptypes; do
         flt=; if [ "${fptype}" == "f" ]; then flt=" -fltonly"; elif [ "${fptype}" == "m" ]; then flt=" -mixonly"; fi
         for helinl in $helinls; do
-          inl=; if [ "${helinl}" == "1" ]; then inl=" -inlonly"; fi
+          inl=; if [ "${helinl}" == "1" ]; then inl=" -inlonly"; elif [ "${helinl}" == "L" ]; then inl=" -inlLonly"; fi
           for hrdcod in $hrdcods; do
             logfile=logs_${proc#-}_${sufflog}/log_${proc#-}_${sufflog}_${fptype}_inl${helinl}_hrd${hrdcod}.txt
             if [ "${rndgen}" != "" ]; then logfile=${logfile%.txt}_${rndgen#-}.txt; fi
diff --git a/epochX/cudacpp/tput/throughputX.sh b/epochX/cudacpp/tput/throughputX.sh
index 267a6e17cf..28227006b0 100755
--- a/epochX/cudacpp/tput/throughputX.sh
+++ b/epochX/cudacpp/tput/throughputX.sh
@@ -17,7 +17,7 @@ export OMPFLAGS=
 
 function usage()
 {
-  echo "Usage: $0 <processes [-eemumu][-ggtt][-ggttg][-ggttgg][-ggttggg][-gqttq][-heftggbb][-susyggtt][-susyggt1t1][-smeftggtttt]> [-bldall][-cudaonly][-hiponly][-noneonly][-sse4only][-avx2only][-512yonly][-512zonly] [-sa] [-noalpaka] [-flt|-fltonly|-mix|-mixonly] [-inl|-inlonly] [-hrd|-hrdonly] [-common|-curhst] [-rmbhst|-bridge] [-omp] [-makeonly|-makeclean|-makecleanonly|-dryrun] [-makej] [-3a3b] [-div] [-req] [-detailed] [-gtest] [-v] [-dlp <dyld_library_path>]" # -nofpe is no longer supported
+  echo "Usage: $0 <processes [-eemumu][-ggtt][-ggttg][-ggttgg][-ggttggg][-gqttq][-heftggbb][-susyggtt][-susyggt1t1][-smeftggtttt]> [-bldall][-cudaonly][-hiponly][-noneonly][-sse4only][-avx2only][-512yonly][-512zonly] [-sa] [-noalpaka] [-flt|-fltonly|-mix|-mixonly] [-inl|-inlonly|-inlL|-inlLonly] [-hrd|-hrdonly] [-common|-curhst] [-rmbhst|-bridge] [-omp] [-makeonly|-makeclean|-makecleanonly|-dryrun] [-makej] [-3a3b] [-div] [-req] [-detailed] [-gtest] [-v] [-dlp <dyld_library_path>]" # -nofpe is no longer supported
   exit 1
 }
 
@@ -44,7 +44,7 @@ bblds=
 alpaka=1
 
 fptypes="d"
-helinls="0"
+helinls="" # set default later
 hrdcods="0"
 rndgen=""
 rmbsam=""
@@ -170,13 +170,21 @@ while [ "$1" != "" ]; do
     fptypes="m"
     shift
   elif [ "$1" == "-inl" ]; then
-    if [ "${helinls}" == "1" ]; then echo "ERROR! Options -inl and -inlonly are incompatible"; usage; fi
+    if [ "${helinls}" != "" ]; then echo "ERROR! Options -inl, -inlonly, -inlL, -inlLonly are incompatible (and can be specified only once)"; usage; fi
     helinls="0 1"
     shift
   elif [ "$1" == "-inlonly" ]; then
-    if [ "${helinls}" == "0 1" ]; then echo "ERROR! Options -inl and -inlonly are incompatible"; usage; fi
+    if [ "${helinls}" != "" ]; then echo "ERROR! Options -inl, -inlonly, -inlL, -inlLonly are incompatible (and can be specified only once)"; usage; fi
     helinls="1"
     shift
+  elif [ "$1" == "-inlL" ]; then
+    if [ "${helinls}" != "" ]; then echo "ERROR! Options -inl, -inlonly, -inlL, -inlLonly are incompatible (and can be specified only once)"; usage; fi
+    helinls="0 1 L"
+    shift
+  elif [ "$1" == "-inlLonly" ]; then
+    if [ "${helinls}" != "" ]; then echo "ERROR! Options -inl, -inlonly, -inlL, -inlLonly are incompatible (and can be specified only once)"; usage; fi
+    helinls="L"
+    shift
   elif [ "$1" == "-hrd" ]; then
     if [ "${hrdcods}" == "1" ]; then echo "ERROR! Options -hrd and -hrdonly are incompatible"; usage; fi
     hrdcods="0 1"
@@ -244,6 +252,9 @@ done
 ###echo procs=$procs
 ###exit 1
 
+# Set defaults a posteriori
+if [ "${helinls}" == "" ]; then helinls="0"; fi
+
 # Workaround for MacOS SIP (SystemIntegrity Protection): set DYLD_LIBRARY_PATH In subprocesses
 if [ "${dlp}" != "" ]; then
   echo "export DYLD_LIBRARY_PATH=$dlp"

From 8fe9ba460e4213dbd69d1318041bbbab4463fa06 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 13:34:42 +0200
Subject: [PATCH 09/50] [helas] in tput/allTees.sh, add 18 inlL tests

---
 epochX/cudacpp/tput/allTees.sh | 64 +++++++++++++++++++++++++---------
 1 file changed, 47 insertions(+), 17 deletions(-)

diff --git a/epochX/cudacpp/tput/allTees.sh b/epochX/cudacpp/tput/allTees.sh
index 284e4f12a3..036f9a2e2f 100755
--- a/epochX/cudacpp/tput/allTees.sh
+++ b/epochX/cudacpp/tput/allTees.sh
@@ -15,6 +15,7 @@ makeclean=-makeclean
 ggttggg=-ggttggg
 rndhst=-curhst
 bsm=
+inlL=
 while [ "$1" != "" ]; do
   if [ "$1" == "-short" ]; then
     # Short (no ggttggg) or long version?
@@ -50,12 +51,23 @@ while [ "$1" != "" ]; do
   elif [ "$1" == "-nobsm" ] && [ "$bsm" != "-bsmonly" ]; then
     bsm=$1
     shift
+  elif [ "$1" == "-inlLonly" ] && [ "$inl" != "-noinlL" ]; then
+    inlL=$1
+    shift
+  elif [ "$1" == "-noinlL" ] && [ "$inl" != "-inlLonly" ]; then
+    inlL=$1
+    shift
   else
-    echo "Usage: $0 [-short] [-e] [-sa] [-makeonly] [-nomakeclean] [-hip] [-bsmonly|-nobsm]"
+    echo "Usage: $0 [-short] [-e] [-sa] [-makeonly] [-nomakeclean] [-hip] [-bsmonly|-nobsm] [-inlLonly|-noinlL]"
     exit 1
   fi
 done
 
+if [ "${bsm}" == "-bsmonly" ] && [ "${inlL}" == "-inlLonly" ]; then
+  echo "ERROR! Options -bsmonly and -inlLonly are incompatible"
+  exit 1
+fi
+
 # This is a script to launch in one go all tests for the (4 or) 5 main processes in this repository
 # It reproduces the logs in tput at the time of commit c0c276840654575d9fa0c3f3c4a0088e57764dbc
 # This is the commit just before the large alphas PR #434
@@ -63,11 +75,11 @@ done
 cd $scrdir/..
 started="STARTED  AT $(date)"
 
-# (36/102) Six logs (double/float/mixed x hrd0/hrd1 x inl0) in each of the six SM processes
+# (36/120) Six logs (double/float/mixed x hrd0/hrd1 x inl0) in each of the six SM processes
 \rm -rf gg_ttggg${suff}/lib/build.none_*
 cmd="./tput/teeThroughputX.sh -mix -hrd -makej -eemumu -ggtt -ggttg -ggttgg -gqttq $ggttggg ${makeclean} ${opts}"
 tmp1=$(mktemp)
-if [ "${bsm}" != "-bsmonly" ]; then
+if [ "${bsm}" != "-bsmonly" ] && [ "${inlL}" != "-inlLonly" ]; then
   $cmd; status=$?
   ls -ltr ee_mumu${suff}/lib/build.none_*_inl0_hrd* gg_tt${suff}/lib/build.none_*_inl0_hrd* gg_tt*g${suff}/lib/build.none_*_inl0_hrd* | egrep -v '(total|\./|\.build|_common|^$)' > $tmp1
 else
@@ -75,12 +87,12 @@ else
 fi
 ended1="$cmd\nENDED(1) AT $(date) [Status=$status]"
 
-# (48/102) Four extra logs (double/float x hrd0/hrd1 x inl1) only in three of the six SM processes
+# (48/120) Four extra logs (double/float x hrd0/hrd1 x inl1) only in three of the six SM processes
 \rm -rf gg_ttg${suff}/lib/build.none_*
 \rm -rf gg_ttggg${suff}/lib/build.none_*
 cmd="./tput/teeThroughputX.sh -flt -hrd -makej -eemumu -ggtt -ggttgg -inlonly ${makeclean} ${opts}"
 tmp2=$(mktemp)
-if [ "${bsm}" != "-bsmonly" ]; then
+if [ "${bsm}" != "-bsmonly" ] && [ "${inlL}" != "-inlLonly" ]; then
   $cmd; status=$?
   ls -ltr ee_mumu${suff}/lib/build.none_*_inl1_hrd* gg_tt${suff}/lib/build.none_*_inl1_hrd* gg_tt*g${suff}/lib/build.none_*_inl1_hrd* | egrep -v '(total|\./|\.build|_common|^$)' > $tmp2
 else
@@ -88,53 +100,64 @@ else
 fi
 ended2="$cmd\nENDED(2) AT $(date) [Status=$status]"
 
-# (60/102) Two extra logs (double/float x hrd0 x inl0 + bridge) in all six SM processes (rebuild from cache)
+# (60/120) Two extra logs (double/float x hrd0 x inl0 + bridge) in all six SM processes (rebuild from cache)
 cmd="./tput/teeThroughputX.sh -makej -eemumu -ggtt -ggttg -gqttq -ggttgg $ggttggg -flt -bridge ${makeclean} ${opts}"
-if [ "${bsm}" != "-bsmonly" ]; then
+if [ "${bsm}" != "-bsmonly" ] && [ "${inlL}" != "-inlLonly" ]; then
   $cmd; status=$?
 else
   cmd="SKIP '$cmd'"; echo $cmd; status=$?
 fi
 ended3="$cmd\nENDED(3) AT $(date) [Status=$status]"
 
-# (66/102) Two extra logs (double/float x hrd0 x inl0 + rmbhst) only in three of the six SM processes (no rebuild needed)
+# (66/120) Two extra logs (double/float x hrd0 x inl0 + rmbhst) only in three of the six SM processes (no rebuild needed)
 cmd="./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -rmbhst ${opts}"
-if [ "${bsm}" != "-bsmonly" ]; then
+if [ "${bsm}" != "-bsmonly" ] && [ "${inlL}" != "-inlLonly" ]; then
   $cmd; status=$?
 else
   cmd="SKIP '$cmd'"; echo $cmd; status=$?
 fi
 ended4="$cmd\nENDED(4) AT $(date) [Status=$status]"
 
-# (72/102) Two extra logs (double/float x hrd0 x inl0 + rndhst) only in three of the six SM processes (no rebuild needed)
+# (72/120) Two extra logs (double/float x hrd0 x inl0 + rndhst) only in three of the six SM processes (no rebuild needed)
 cmd="./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt ${rndhst} ${opts}"
-if [ "${bsm}" != "-bsmonly" ] && [ "${rndhst}" != "-common" ]; then
+if [ "${bsm}" != "-bsmonly" ] && [ "${inlL}" != "-inlLonly" ] && [ "${rndhst}" != "-common" ]; then
   $cmd; status=$?
 else
   cmd="SKIP '$cmd'"; echo $cmd; status=$?
 fi
 ended5="$cmd\nENDED(5) AT $(date) [Status=$status]"
 
-# (78/102) Two extra logs (double/float x hrd0 x inl0 + common) only in three of the six SM processes (no rebuild needed)
+# (78/120) Two extra logs (double/float x hrd0 x inl0 + common) only in three of the six SM processes (no rebuild needed)
 cmd="./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -common ${opts}"
-if [ "${bsm}" != "-bsmonly" ]; then
+if [ "${bsm}" != "-bsmonly" ] && [ "${inlL}" != "-inlLonly" ]; then
   $cmd; status=$?
 else
   cmd="SKIP '$cmd'"; echo $cmd; status=$?
 fi
 ended6="$cmd\nENDED(6) AT $(date) [Status=$status]"
 
-# (102/102) Six extra logs (double/float/mixed x hrd0/hrd1 x inl0) only in the four BSM processes
+# (102/120) Six extra logs (double/float/mixed x hrd0/hrd1 x inl0) only in the four BSM processes
 cmd="./tput/teeThroughputX.sh -mix -hrd -makej -susyggtt -susyggt1t1 -smeftggtttt -heftggbb ${makeclean} ${opts}"
-tmp3=$(mktemp)
-if [ "${bsm}" != "-nobsm" ]; then
+tmp7=$(mktemp)
+if [ "${bsm}" != "-nobsm" ] && [ "${inlL}" != "-inlLonly" ]; then
   $cmd; status=$?
-  ls -ltr susy_gg_tt${suff}/lib/build.none_*_inl0_hrd* susy_gg_t1t1${suff}/lib/build.none_*_inl0_hrd* smeft_gg_tttt${suff}/lib/build.none_*_inl0_hrd* heft_gg_bb${suff}/lib/build.none_*_inl0_hrd* | egrep -v '(total|\./|\.build|_common|^$)' > $tmp2
+  ls -ltr susy_gg_tt${suff}/lib/build.none_*_inl0_hrd* susy_gg_t1t1${suff}/lib/build.none_*_inl0_hrd* smeft_gg_tttt${suff}/lib/build.none_*_inl0_hrd* heft_gg_bb${suff}/lib/build.none_*_inl0_hrd* | egrep -v '(total|\./|\.build|_common|^$)' > $tmp7
 else
   cmd="SKIP '$cmd'"; echo $cmd; status=$?
 fi
 ended7="$cmd\nENDED(7) AT $(date) [Status=$status]"
 
+# (120/120) Three extra logs (double/float/mixed x inlL) only in the six SM processes
+cmd="./tput/teeThroughputX.sh -inlLonly -mix -makej -eemumu -ggtt -ggttg -ggttgg -gqttq $ggttggg ${makeclean} ${opts}"
+tmp8=$(mktemp)
+if [ "${bsm}" != "-bsmonly" ] && [ "${inlL}" != "-noinlL" ]; then
+  $cmd; status=$?
+  ls -ltr *${suff}/lib/build.none_*_inlL_hrd* | egrep -v '(total|\./|\.build|_common|^$)' > $tmp8
+else
+  cmd="SKIP '$cmd'"; echo $cmd; status=$?
+fi
+ended8="$cmd\nENDED(8) AT $(date) [Status=$status]"
+
 echo
 echo "Build(1):"
 cat $tmp1
@@ -142,6 +165,12 @@ echo
 echo "Build(2):"
 cat $tmp2
 echo
+echo "Build(7):"
+cat $tmp7
+echo
+echo "Build(8):"
+cat $tmp8
+echo
 echo -e "$started"
 echo -e "$ended1"
 echo -e "$ended2"
@@ -150,6 +179,7 @@ echo -e "$ended4"
 echo -e "$ended5"
 echo -e "$ended6"
 echo -e "$ended7"
+echo -e "$ended8"
 
 if [ "$ggttggg" == "" ]; then
   echo

From 4ee2863f5e39415e914864d50b77a6d6b00ac959 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 13:39:43 +0200
Subject: [PATCH 10/50] [helas] in gg_tt.mad, fix clang formatting

---
 epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc | 6 ++----
 epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h                  | 2 +-
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc
index dbda544a11..15add2407a 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc
@@ -37,7 +37,6 @@ namespace mg5amcGpu
 namespace mg5amcCpu
 #endif
 {
-
   //--------------------------------------------------------------------------
 
 #ifdef MGONGPUCPP_GPUIMPL
@@ -48,8 +47,8 @@ namespace mg5amcCpu
   using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
   using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
-  using NUM_ACCESS = DeviceAccessNumerators;    // non-trivial access: buffer includes all events
-  using DEN_ACCESS = DeviceAccessDenominators;  // non-trivial access: buffer includes all events
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
 #endif
 #else
   using namespace ::mg5amcCpu;
@@ -119,7 +118,6 @@ namespace mg5amcCpu
   }
 
   //--------------------------------------------------------------------------
-
 }
 
 #endif
diff --git a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h
index 9a0bbb66ef..3c1bc33a2d 100644
--- a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h
@@ -1086,7 +1086,7 @@ namespace mg5amcCpu
   //--------------------------------------------------------------------------
 
 #endif
-  
+
   //==========================================================================
 
 } // end namespace

From 0b259a811ec9418e7018cab114f4ceb8ad2778a7 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 13:52:38 +0200
Subject: [PATCH 11/50] [helas] in gg_tt.mad, fix inlineHel=L printout in
 check_sa.cc

---
 epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif

From 7fb5a254826650c2873af8e29c1780fcd35b9f39 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 14:06:35 +0200
Subject: [PATCH 12/50] [helas] in gg_tt.mad CPPProcess.cc and HelAmps_sm.h,
 move code around to ease code generation

---
 .../gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc | 12 ------------
 epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h          | 14 +++++++++++++-
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
index 71c85c86e3..576ea7cb4d 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
@@ -332,18 +332,6 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 );
 
-#ifdef MGONGPU_LINKER_HELAMPS
-#define helas_VVV1P0_1 linker_VVV1P0_1
-#define helas_FFV1_0 linker_FFV1_0
-#define helas_FFV1_1 linker_FFV1_1
-#define helas_FFV1_2 linker_FFV1_2
-#else
-#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
-#endif
-
       helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 1
diff --git a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h
index 3c1bc33a2d..574dd3755c 100644
--- a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h
@@ -1034,7 +1034,19 @@ namespace mg5amcCpu
 
   //==========================================================================
 
-#ifdef MGONGPU_LINKER_HELAMPS
+#ifndef MGONGPU_LINKER_HELAMPS
+
+#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+
+#else
+
+#define helas_VVV1P0_1 linker_VVV1P0_1
+#define helas_FFV1_0 linker_FFV1_0
+#define helas_FFV1_1 linker_FFV1_1
+#define helas_FFV1_2 linker_FFV1_2
 
   //--------------------------------------------------------------------------
 

From 716326c5745f83962db7acb5e4836c1293e82e66 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 14:11:54 +0200
Subject: [PATCH 13/50] [helas] in gg_tt.mad cudacpp.mk, build HelAmps.o and
 use rdc=true only in the HELINL=L mode

---
 epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
index f6e21ac493..0d8ea00f01 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
@@ -650,7 +650,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -779,10 +778,13 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o

From bf676af494fda1e02e4e4802a51d11fcc6b8d4b3 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 13:46:43 +0200
Subject: [PATCH 14/50] [helas] first quick tput test of ggtt including -inlL
 option: ok for c++, a factor 3 slower for cuda...

./tput/teeThroughputX.sh -ggtt -makej -makeclean -inlLonly

diff -u --color tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt tput/logs_ggtt_mad/log_ggtt_mad_d_inlL_hrd0.txt

-Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
+Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.589473e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.164485e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.280951e+08                 )  sec^-1
-MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.528239 sec
-INFO: No Floating Point Exceptions have been reported
-     2,222,057,027      cycles                           #    2.887 GHz
-     3,171,868,018      instructions                     #    1.43  insn per cycle
-       0.826440817 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
+EvtsPerSec[Rmb+ME]     (23) = ( 2.667135e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.116115e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.251573e+07                 )  sec^-1
+MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
+TOTAL       :     0.550450 sec
+INFO: No Floating Point Exceptions have been reported
+     2,272,219,097      cycles                           #    2.889 GHz
+     3,361,475,195      instructions                     #    1.48  insn per cycle
+       0.842685843 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inlL_hrd0/check_cuda.exe -p 2048 256 1
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 190
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
---
 .../log_ggtt_mad_d_inl0_hrd0.txt              |  90 +++----
 .../log_ggtt_mad_d_inl1_hrd0.txt              |  90 +++----
 .../log_ggtt_mad_d_inlL_hrd0.txt              | 229 ++++++++++++++++++
 3 files changed, 323 insertions(+), 86 deletions(-)
 create mode 100644 epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inlL_hrd0.txt

diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt
index 34e03e8fe4..9114f2e8bd 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 25s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_19:50:43
+DATE: 2024-08-28_14:28:14
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.015578e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.167678e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.279582e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.589473e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.164485e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.280951e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.520495 sec
+TOTAL       :     0.528239 sec
 INFO: No Floating Point Exceptions have been reported
-     2,215,808,169      cycles                           #    2.946 GHz                    
-     3,187,450,258      instructions                     #    1.44  insn per cycle         
-       0.809093508 seconds time elapsed
+     2,222,057,027      cycles                           #    2.887 GHz                    
+     3,171,868,018      instructions                     #    1.43  insn per cycle         
+       0.826440817 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.870302e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.920397e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.920397e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.823929e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.870912e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.870912e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     5.747684 sec
+TOTAL       :     5.856817 sec
 INFO: No Floating Point Exceptions have been reported
-    17,324,193,414      cycles                           #    3.009 GHz                    
-    46,060,464,647      instructions                     #    2.66  insn per cycle         
-       5.757711057 seconds time elapsed
+    17,161,452,897      cycles                           #    2.928 GHz                    
+    45,937,120,419      instructions                     #    2.68  insn per cycle         
+       5.862725573 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  618) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.256365e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.416045e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.416045e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.126989e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.279897e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.279897e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.359278 sec
+TOTAL       :     3.462372 sec
 INFO: No Floating Point Exceptions have been reported
-    10,153,117,527      cycles                           #    3.015 GHz                    
-    27,956,665,962      instructions                     #    2.75  insn per cycle         
-       3.369058986 seconds time elapsed
+    10,017,721,294      cycles                           #    2.889 GHz                    
+    27,835,306,533      instructions                     #    2.78  insn per cycle         
+       3.467900747 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2534) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.128206e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.537547e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.537547e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.028989e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.416165e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.416165e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.182924 sec
+TOTAL       :     2.192265 sec
 INFO: No Floating Point Exceptions have been reported
-     6,226,289,605      cycles                           #    2.841 GHz                    
-    12,698,897,797      instructions                     #    2.04  insn per cycle         
-       2.192278719 seconds time elapsed
+     6,072,828,199      cycles                           #    2.764 GHz                    
+    12,576,463,194      instructions                     #    2.07  insn per cycle         
+       2.197922075 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2612) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.605220e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.105851e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.105851e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.485067e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.946458e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.946458e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.009834 sec
+TOTAL       :     2.018436 sec
 INFO: No Floating Point Exceptions have been reported
-     5,688,710,640      cycles                           #    2.818 GHz                    
-    12,134,437,252      instructions                     #    2.13  insn per cycle         
-       2.019506075 seconds time elapsed
+     5,586,234,830      cycles                           #    2.761 GHz                    
+    12,014,178,237      instructions                     #    2.15  insn per cycle         
+       2.024082473 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2350) (512y:  144) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.669310e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.868262e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.868262e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.521988e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.705669e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.705669e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.997018 sec
+TOTAL       :     3.084504 sec
 INFO: No Floating Point Exceptions have been reported
-     5,821,558,239      cycles                           #    1.938 GHz                    
-     8,411,130,761      instructions                     #    1.44  insn per cycle         
-       3.006784964 seconds time elapsed
+     5,693,114,796      cycles                           #    1.843 GHz                    
+     8,291,693,937      instructions                     #    1.46  insn per cycle         
+       3.090277932 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1429) (512y:  122) (512z: 1801)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt
index 1d562b1c51..0bede2793b 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 18s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_20:08:54
+DATE: 2024-08-28_14:28:38
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.079454e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.184027e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.281167e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.586606e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.163939e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.280339e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.525003 sec
+TOTAL       :     0.525735 sec
 INFO: No Floating Point Exceptions have been reported
-     2,200,806,347      cycles                           #    2.912 GHz                    
-     3,172,188,132      instructions                     #    1.44  insn per cycle         
-       0.814200484 seconds time elapsed
+     2,232,365,149      cycles                           #    2.890 GHz                    
+     3,182,956,973      instructions                     #    1.43  insn per cycle         
+       0.829750392 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.477886e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.565553e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.565553e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.432883e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.516480e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.516480e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     4.370943 sec
+TOTAL       :     4.416414 sec
 INFO: No Floating Point Exceptions have been reported
-    13,117,582,836      cycles                           #    2.995 GHz                    
-    34,450,679,536      instructions                     #    2.63  insn per cycle         
-       4.380756610 seconds time elapsed
+    12,990,944,477      cycles                           #    2.938 GHz                    
+    34,329,027,064      instructions                     #    2.64  insn per cycle         
+       4.422282858 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  665) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.033084e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.174712e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.174712e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.972324e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.108975e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.108975e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.593818 sec
+TOTAL       :     3.636495 sec
 INFO: No Floating Point Exceptions have been reported
-    10,811,449,443      cycles                           #    3.001 GHz                    
-    24,123,594,949      instructions                     #    2.23  insn per cycle         
-       3.603506153 seconds time elapsed
+    10,687,597,744      cycles                           #    2.935 GHz                    
+    24,000,551,707      instructions                     #    2.25  insn per cycle         
+       3.642117513 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2571) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.731678e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.069353e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.069353e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.628578e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.954501e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.954501e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.354482 sec
+TOTAL       :     2.372776 sec
 INFO: No Floating Point Exceptions have been reported
-     6,707,294,523      cycles                           #    2.838 GHz                    
-    12,465,505,098      instructions                     #    1.86  insn per cycle         
-       2.364349203 seconds time elapsed
+     6,572,155,340      cycles                           #    2.764 GHz                    
+    12,342,988,553      instructions                     #    1.88  insn per cycle         
+       2.378340216 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3096) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.061977e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.447561e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.447561e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.926591e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.300605e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.300605e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.207748 sec
+TOTAL       :     2.235661 sec
 INFO: No Floating Point Exceptions have been reported
-     6,305,288,080      cycles                           #    2.845 GHz                    
-    11,685,678,996      instructions                     #    1.85  insn per cycle         
-       2.217142463 seconds time elapsed
+     6,180,900,661      cycles                           #    2.759 GHz                    
+    11,564,434,089      instructions                     #    1.87  insn per cycle         
+       2.241440916 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2640) (512y:  239) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.929117e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.157594e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.157594e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.738770e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.946470e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.946470e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.806226 sec
+TOTAL       :     2.911722 sec
 INFO: No Floating Point Exceptions have been reported
-     5,500,190,609      cycles                           #    1.954 GHz                    
-     9,401,836,893      instructions                     #    1.71  insn per cycle         
-       2.816415768 seconds time elapsed
+     5,383,828,846      cycles                           #    1.846 GHz                    
+     9,280,485,426      instructions                     #    1.72  insn per cycle         
+       2.917468081 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2084) (512y:  282) (512z: 1954)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inlL_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inlL_hrd0.txt
new file mode 100644
index 0000000000..0b16978fc1
--- /dev/null
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inlL_hrd0.txt
@@ -0,0 +1,229 @@
+
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 16s
+------------------------------------------------
+
+Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+BACKEND=cpp512y (was cppauto)
+OMPFLAGS=
+FPTYPE='d'
+HELINL='0'
+HRDCOD='0'
+HASCURAND=hasCurand
+HASHIPRAND=hasNoHiprand
+Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
+make: Nothing to be done for 'gtestlibs'.
+
+make USEBUILDDIR=1 BACKEND=cuda
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+
+make USEBUILDDIR=1 BACKEND=cppnone
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+
+make USEBUILDDIR=1 BACKEND=cppsse4
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+
+make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+
+make USEBUILDDIR=1 BACKEND=cpp512y
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+
+make USEBUILDDIR=1 BACKEND=cpp512z
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+
+DATE: 2024-08-28_14:29:02
+
+On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inlL_hrd0/check_cuda.exe -p 2048 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 2.667135e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.116115e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.251573e+07                 )  sec^-1
+MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
+TOTAL       :     0.550450 sec
+INFO: No Floating Point Exceptions have been reported
+     2,272,219,097      cycles                           #    2.889 GHz                    
+     3,361,475,195      instructions                     #    1.48  insn per cycle         
+       0.842685843 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inlL_hrd0/check_cuda.exe -p 2048 256 1
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 190
+==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inlL_hrd0/runTest_cuda.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inlL_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inlL_hrd0/fcheck_cuda.exe 2 64 2
+Avg ME (C++/GPU)   = 2.028807e+00
+Avg ME (F77/GPU)   = 2.0288063388516822
+Relative difference = 3.2588034143755247e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inlL_hrd0/check_hip.exe
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inlL_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
+EvtsPerSec[Rmb+ME]     (23) = ( 1.832998e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.879765e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.879765e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
+TOTAL       :     5.828276 sec
+INFO: No Floating Point Exceptions have been reported
+    17,179,972,160      cycles                           #    2.946 GHz                    
+    46,155,122,198      instructions                     #    2.69  insn per cycle         
+       5.834030419 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  273) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 2.028807e+00
+Avg ME (F77/C++)    = 2.0288063388515649
+Relative difference = 3.258803992249869e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inlL_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 3.183851e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.339899e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.339899e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
+TOTAL       :     3.400678 sec
+INFO: No Floating Point Exceptions have been reported
+    10,026,337,742      cycles                           #    2.944 GHz                    
+    28,036,704,962      instructions                     #    2.80  insn per cycle         
+       3.406380757 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  874) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 2.028807e+00
+Avg ME (F77/C++)    = 2.0288063388515654
+Relative difference = 3.2588039900609506e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inlL_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 4.970649e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.360066e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.360066e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
+TOTAL       :     2.218211 sec
+INFO: No Floating Point Exceptions have been reported
+     6,090,497,709      cycles                           #    2.740 GHz                    
+    12,538,833,027      instructions                     #    2.06  insn per cycle         
+       2.223935212 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1544) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 2.028807e+00
+Avg ME (F77/C++)    = 2.0288063388516204
+Relative difference = 3.2588037186351226e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inlL_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 5.364093e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.809257e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.809257e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
+TOTAL       :     2.061967 sec
+INFO: No Floating Point Exceptions have been reported
+     5,638,892,941      cycles                           #    2.729 GHz                    
+    12,020,058,201      instructions                     #    2.13  insn per cycle         
+       2.067543648 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1317) (512y:  144) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 2.028807e+00
+Avg ME (F77/C++)    = 2.0288063388516204
+Relative difference = 3.2588037186351226e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inlL_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 3.493455e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.675955e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.675955e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
+TOTAL       :     3.109485 sec
+INFO: No Floating Point Exceptions have been reported
+     5,760,815,236      cycles                           #    1.850 GHz                    
+     8,461,440,763      instructions                     #    1.47  insn per cycle         
+       3.115293443 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1414) (512y:  122) (512z:  860)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 2.028807e+00
+Avg ME (F77/C++)    = 2.0288063388516204
+Relative difference = 3.2588037186351226e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+
+TEST COMPLETED

From ee84d7d4964501c78a6ad78ad0f9c6be50c8ca37 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 13:55:25 +0200
Subject: [PATCH 15/50] [helas] in CODEGEN, complete the backport from
 gg_tt.mad of file templates in HELINL=L mode

---
 .../iolibs/template_files/gpu/check_sa.cc     |  2 ++
 .../iolibs/template_files/gpu/cudacpp.mk      | 21 ++++++++++++++-----
 .../template_files/gpu/cudacpp_config.mk      |  2 +-
 .../iolibs/template_files/gpu/mgOnGpuConfig.h | 16 ++++++++++++--
 4 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk
index 78512a5eeb..e10ac49570 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk
@@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
@@ -647,7 +650,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -776,6 +778,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
@@ -786,12 +796,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -962,6 +972,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuConfig.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuConfig.h
index ee04b3b8aa..f12d231fc6 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuConfig.h
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -78,9 +78,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20%% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -147,6 +154,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {

From 4c4198fd148f4e222bbbdbe5556aa18d881e5476 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 16:05:22 +0200
Subject: [PATCH 16/50] [helas] in CODEGEN model_handling.py, complete the
 backport from gg_tt.mad of HelAmps.h in HELINL=L mode

---
 .../CUDACPP_SA_OUTPUT/model_handling.py       | 76 ++++++++++++++-----
 1 file changed, 57 insertions(+), 19 deletions(-)

diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py
index 98ff161c58..b72d423d0d 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py
@@ -121,6 +121,12 @@ class PLUGIN_ALOHAWriter(aloha_writers.ALOHAWriterForGPU):
     ###nodeclare = False # old behaviour (separate declaration with no initialization)
     nodeclare = True # new behaviour (delayed declaration with initialisation)
 
+    # AV - modify aloha_writers.WriteALOHA method (add a debug printout)
+    def write(self, **opt):
+        ###misc.sprint('Entering PLUGIN_ALOHAWriter.write')
+        out = super().write(**opt)
+        return out
+
     # AV - modify aloha_writers.ALOHAWriterForCPP method (improve formatting)
     def change_number_format(self, number):
         """Formatting the number"""
@@ -164,7 +170,7 @@ def isinteger(x):
     # [NB: this exists in ALOHAWriterForGPU but essentially falls back to ALOHAWriterForCPP]
     # [NB: no, actually this exists twice(!) in ForGPU and the 2nd version is not trivial! but I keep the ForCPP version]
     # This affects HelAmps_sm.h and HelAmps_sm.cc
-    def get_header_txt(self, name=None, couplings=None,mode=''):
+    def get_header_txt(self, name=None, couplings=None, mode=''):
         """Define the Header of the fortran file. This include
             - function tag
             - definition of variable
@@ -175,7 +181,7 @@ def get_header_txt(self, name=None, couplings=None,mode=''):
             mode = self.mode
         out = StringIO()
         # define the type of function and argument
-        if not 'no_include' in mode:
+        if not 'no_include' in mode and not 'linker' in mode:
             out.write('#include \"%s.h\"\n\n' % self.name)
         args = []
         comment_inputs = [] # AV
@@ -207,7 +213,8 @@ def get_header_txt(self, name=None, couplings=None,mode=''):
             output = '%(doublec)s allvertexes[]' % {
                 'doublec': self.type2def['double']}
             comment_output = 'amplitude \'vertex\''
-            template = 'template<class W_ACCESS, class A_ACCESS, class C_ACCESS>'
+            template = '  template<class W_ACCESS, class A_ACCESS, class C_ACCESS>\n'
+            template_define1 = '<W_ACCESS, A_ACCESS, CD_ACCESS>'
         else:
             output = '%(doublec)s all%(spin)s%(id)d[]' % {
                      'doublec': self.type2def['double'],
@@ -215,22 +222,30 @@ def get_header_txt(self, name=None, couplings=None,mode=''):
                      'id': self.outgoing}
             ###self.declaration.add(('list_complex', output)) # AV BUG FIX - THIS IS NOT NEEDED AND IS WRONG (adds name 'cxtype_sv V3[]')
             comment_output = 'wavefunction \'%s%d[6]\'' % ( self.particles[self.outgoing -1], self.outgoing ) # AV (wavefuncsize=6)
-            template = 'template<class W_ACCESS, class C_ACCESS>'
+            template = '  template<class W_ACCESS, class C_ACCESS>\n'
+            template_define1 = '<W_ACCESS, CD_ACCESS>'
+        if 'linker' in mode: template = ''
         comment = '// Compute the output %s from the input wavefunctions %s' % ( comment_output, ', '.join(comment_inputs) ) # AV
+        if 'linker_decl' in mode : name = 'linker_' + name
         indent = ' ' * len( '  %s( ' % name )
-        out.write('  %(comment)s\n  %(template)s\n  %(prefix)s void\n  %(name)s( const %(args)s,\n%(indent)s%(output)s )%(suffix)s' %
-                  {'comment': comment, # AV - add comment
-                   'template': template, # AV - add template
-                   'prefix': self.prefix + ( ' INLINE' if 'is_h' in mode else '' ), # AV - add INLINE
-                   'suffix': ( ' ALWAYS_INLINE' if 'is_h' in mode else '' ), # AV - add ALWAYS_INLINE
-                   'indent':indent, 'output':output, 'name': name,
-                   'args': (',\n' + indent + 'const ').join(args)}) # AV - add const, add indent
-        if 'is_h' in mode:
-            out.write(';\n')
-            out.write('\n  //--------------------------------------------------------------------------\n') # AV add footer
+        if not 'linker_define' in mode :
+            out.write('  %(comment)s\n%(template)s  %(prefix)s void\n  %(name)s( const %(args)s,\n%(indent)s%(output)s )%(suffix)s' %
+                      {'comment': comment, # AV - add comment
+                       'template': template, # AV - add template
+                       'prefix': self.prefix + ( ' INLINE' if 'is_h' in mode else '' ), # AV - add INLINE
+                       'suffix': ( ' ALWAYS_INLINE' if 'is_h' in mode else '' ), # AV - add ALWAYS_INLINE
+                       'indent':indent, 'output':output, 'name': name,
+                       'args': (',\n' + indent + 'const ').join(args)}) # AV - add const, add indent
+            if 'is_h' in mode or 'linker_decl' in mode:
+                out.write(';\n')
+                out.write('\n  //--------------------------------------------------------------------------\n') # AV add footer
+            else:
+                ###out.write('\n{\n')
+                out.write('\n  {\n') # AV
+        elif 'linker_define1' in mode :
+            out.write('#define helas_%s %s%s'%(name,name,template_define1))
         else:
-            ###out.write('\n{\n')
-            out.write('\n  {\n') # AV
+            out.write('#define helas_%s linker_%s'%(name,name))
         return out.getvalue()
 
     # AV - modify aloha_writers.ALOHAWriterForCPP method (improve formatting)
@@ -1064,6 +1079,9 @@ def write_aloha_routines(self):
         # Read in the template .h and .cc files, stripped of compiler commands and namespaces
         template_h_files = self.read_aloha_template_files(ext = 'h')
         template_cc_files = self.read_aloha_template_files(ext = 'cc')
+        template_h2a_files = ['']
+        template_h2b_files = ['']
+        template_h2c_files = ['']
         aloha_model = create_aloha.AbstractALOHAModel(self.model.get('name'), explicit_combine=True)
         aloha_model.add_Lorentz_object(self.model.get('lorentz'))
         if self.wanted_lorentz:
@@ -1072,14 +1090,22 @@ def write_aloha_routines(self):
             aloha_model.compute_all(save=False, custom_propa=True)
         for abstracthelas in dict(aloha_model).values():
             print(type(abstracthelas), abstracthelas.name) # AV this is the loop on FFV functions
-            h_rout, cc_rout = abstracthelas.write(output_dir=None, language=self.aloha_writer, mode='no_include')
+            h_rout, cc_rout = abstracthelas.write(output_dir=None, language=self.aloha_writer, mode='no_include') # AV this eventually calls PLUGIN_ALOHAWriter.write
             template_h_files.append(h_rout)
             template_cc_files.append(cc_rout)
+            writer2 = aloha_writers.WriterFactory(abstracthelas, self.aloha_writer, None, abstracthelas.tag) # AV as in create_aloha.AbstractRoutine,write
+            h2a_rout = writer2.get_header_txt(mode='linker_define1')
+            h2b_rout = writer2.get_header_txt(mode='linker_define2')
+            h2c_rout = writer2.get_header_txt(mode='linker_decl')
+            template_h2a_files.append(h2a_rout)
+            template_h2b_files.append(h2b_rout)
+            template_h2c_files.append(h2c_rout)
         replace_dict['function_declarations'] = '\n'.join(template_h_files)
         replace_dict['function_definitions'] = '\n'.join(template_cc_files)
         file_h = self.read_template_file(self.aloha_template_h) % replace_dict
         file_cc = self.read_template_file(self.aloha_template_cc) % replace_dict
         file_cc = '\n'.join( file_cc.split('\n')[9:] ) # skip first 9 lines in cpp_hel_amps_cc.inc (copyright including ALOHA)
+        file_cc = file_cc[:-1] # skip the trailing empty line
         # Write the HelAmps_sm.h and HelAmps_sm.cc files
         ###PLUGIN_writers.CPPWriter(model_h_file).writelines(file_h)
         ###PLUGIN_writers.CPPWriter(model_cc_file).writelines(file_cc)
@@ -1091,9 +1117,21 @@ def write_aloha_routines(self):
         ###             os.path.split(model_cc_file)[0]))
         # Write only the HelAmps_sm.h file
         file_h_lines = file_h.split('\n')
-        file_h = '\n'.join( file_h_lines[:-3]) # skip the trailing '//---'
+        file_cc_lines = file_cc.split('\n')
+        file_h = ''
+        file_h += '\n'.join( file_h_lines[:-3]) # skip the trailing '//---'
+        file_cc = '\n'.join( file_cc_lines[:-5]) # skip the footer
+        file_cc_footer = '\n'.join( file_cc_lines[-3:]) # keep the footer (excluding one //--- separator) for later
         file_h += file_cc # append the contents of HelAmps_sm.cc directly to HelAmps_sm.h!
-        file_h = file_h[:-1] # skip the trailing empty line
+        file_h += '\n  //==========================================================================\n'
+        file_h += '\n#ifndef MGONGPU_LINKER_HELAMPS\n'
+        file_h += '\n'.join(template_h2a_files)
+        file_h += '\n\n#else\n'
+        file_h += '\n'.join(template_h2b_files)
+        file_h += '\n\n  //--------------------------------------------------------------------------\n'
+        file_h += '\n'.join(template_h2c_files)
+        file_h += '\n#endif\n\n  //==========================================================================\n\n'
+        file_h = file_h + file_cc_footer # add the footer
         PLUGIN_writers.CPPWriter(model_h_file).writelines(file_h)
         logger.info('Created file %s in directory %s' \
                     % (os.path.split(model_h_file)[-1], os.path.split(model_h_file)[0] ) )

From ae7d18b8643c56c4f3c3f25d8791bf9871b45783 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 18:44:34 +0200
Subject: [PATCH 17/50] [helas] in CODEGEN model_handling.py, complete the
 backport from gg_tt.mad of CPPProcess.cc in HELINL=L mode

---
 .../PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py        | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py
index b72d423d0d..945de526be 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py
@@ -1365,7 +1365,9 @@ def get_all_sigmaKin_lines(self, color_amplitudes, class_name):
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -1377,7 +1379,9 @@ def get_all_sigmaKin_lines(self, color_amplitudes, class_name):
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -2179,10 +2183,9 @@ def generate_helas_call(self, argument):
             if usesdepcoupl is None: raise Exception('PANIC! could not determine if this call uses aS-dependent or aS-independent couplings?')
             elif usesdepcoupl: caccess = 'CD_ACCESS'
             else: caccess = 'CI_ACCESS'
-            ###if arg['routine_name'].endswith( '_0' ) : arg['routine_name'] += '<W_ACCESS, A_ACCESS, C_ACCESS>'
-            ###else : arg['routine_name'] += '<W_ACCESS, C_ACCESS>'
-            if arg['routine_name'].endswith( '_0' ) : arg['routine_name'] += '<W_ACCESS, A_ACCESS, %s>'%caccess
-            else : arg['routine_name'] += '<W_ACCESS, %s>'%caccess
+            ###if arg['routine_name'].endswith( '_0' ) : arg['routine_name'] += '<W_ACCESS, A_ACCESS, %s>'%caccess
+            ###else : arg['routine_name'] += '<W_ACCESS, %s>'%caccess
+            arg['routine_name'] = 'helas_' + arg['routine_name']
             if isinstance(argument, helas_objects.HelasWavefunction):
                 #arg['out'] = 'w_sv[%(out)d]'
                 arg['out'] = 'w_fp[%(out)d]'

From a58cc9c0d4744503f81ab2cc245df2fd9b996cd8 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 18:53:13 +0200
Subject: [PATCH 18/50] [helas] in gg_tt.mad, move HelAmps.cc to SubProcesses
 and link it in P* (the source is the same but it must be compiled in each P*
 separately)

---
 .../cudacpp/gg_tt.mad/SubProcesses/HelAmps.cc | 123 +++++++++++++++++
 .../SubProcesses/P1_gg_ttx/HelAmps.cc         | 124 +-----------------
 2 files changed, 124 insertions(+), 123 deletions(-)
 create mode 100644 epochX/cudacpp/gg_tt.mad/SubProcesses/HelAmps.cc
 mode change 100644 => 120000 epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc

diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/HelAmps.cc
new file mode 100644
index 0000000000..15add2407a
--- /dev/null
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/HelAmps.cc
@@ -0,0 +1,123 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// Helicity amplitudes for calculating the matrix elements for
+// Process: g g > t t~ WEIGHTED<=2 @1
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+//#include "coloramps.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+}
+
+#endif
diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc
deleted file mode 100644
index 15add2407a..0000000000
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc
+++ /dev/null
@@ -1,123 +0,0 @@
-// Copyright (C) 2020-2024 CERN and UCLouvain.
-// Licensed under the GNU Lesser General Public License (version 3 or later).
-// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
-// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
-
-#ifdef MGONGPU_LINKER_HELAMPS
-
-#include "HelAmps_sm.h"
-
-// -----------------------------------------------------------------------------
-// Helicity amplitudes for calculating the matrix elements for
-// Process: g g > t t~ WEIGHTED<=2 @1
-// -----------------------------------------------------------------------------
-// *** NB: this implementation class depends on MemoryAccessMomenta,
-// *** where the AOSOA definition depends on CPPProcess::npar,
-// *** which may be different in different P* subprocess directories:
-// *** therefore this class is presently hosted and compiled in each P*
-// -----------------------------------------------------------------------------
-
-#include "MemoryAccessAmplitudes.h"
-#include "MemoryAccessCouplings.h"
-#include "MemoryAccessCouplingsFixed.h"
-#include "MemoryAccessGs.h"
-#include "MemoryAccessMatrixElements.h"
-#include "MemoryAccessMomenta.h"
-#include "MemoryAccessWavefunctions.h"
-
-#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
-#include "MemoryAccessDenominators.h"
-#include "MemoryAccessNumerators.h"
-//#include "coloramps.h"
-#endif
-
-#ifdef MGONGPUCPP_GPUIMPL
-namespace mg5amcGpu
-#else
-namespace mg5amcCpu
-#endif
-{
-  //--------------------------------------------------------------------------
-
-#ifdef MGONGPUCPP_GPUIMPL
-  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
-  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
-  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
-  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
-  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
-  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
-#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
-  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
-  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
-#endif
-#else
-  using namespace ::mg5amcCpu;
-  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
-  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
-  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
-  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
-  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
-  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
-#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
-  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
-  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
-#endif
-#endif
-
-  //--------------------------------------------------------------------------
-
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
-  __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] )
-  {
-    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
-  }
-
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
-  __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
-  {
-    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
-  }
-
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
-  __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] )
-  {
-    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
-  }
-
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
-  __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] )
-  {
-    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
-  }
-
-  //--------------------------------------------------------------------------
-}
-
-#endif
diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file

From 64875e73bd9919df73bbbc9d758445d925d911eb Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 19:03:15 +0200
Subject: [PATCH 19/50] [helas] in CODEGEN and gg_tt.mad, fix HelAmps.cc in
 HELINL=L mode and complete its backport

---
 .../template_files/gpu/cpp_hel_amps_cc2.inc   | 65 +++++++++++++++++++
 .../CUDACPP_SA_OUTPUT/model_handling.py       | 49 ++++++++++----
 .../cudacpp/gg_tt.mad/SubProcesses/HelAmps.cc | 11 ++--
 3 files changed, 109 insertions(+), 16 deletions(-)
 create mode 100644 epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cpp_hel_amps_cc2.inc

diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cpp_hel_amps_cc2.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cpp_hel_amps_cc2.inc
new file mode 100644
index 0000000000..a372da36a9
--- /dev/null
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cpp_hel_amps_cc2.inc
@@ -0,0 +1,65 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+%(function_definitions2)s}
+#endif
diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py
index 945de526be..2327596bf4 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py
@@ -184,6 +184,7 @@ def get_header_txt(self, name=None, couplings=None, mode=''):
         if not 'no_include' in mode and not 'linker' in mode:
             out.write('#include \"%s.h\"\n\n' % self.name)
         args = []
+        argnames = []
         comment_inputs = [] # AV
         for format, argname in self.define_argument_list(couplings):
             if format.startswith('list'):
@@ -204,14 +205,15 @@ def get_header_txt(self, name=None, couplings=None, mode=''):
                 point = self.type2def['pointer_coup']
                 args.append('%s %s%s%s'% (type, point, argname, list_arg))
                 args.append('double Ccoeff%s'% argname[7:]) # OM for 'unary minus' #628
+                argnames.append(argname)
+                argnames.append('Ccoeff%s'% argname[7:])
             else:
                 args.append('%s %s%s'% (type, argname, list_arg))
+                argnames.append(argname)
         if not self.offshell:
-            ###output = '%(doublec)s%(pointer_vertex)s allvertexes' % {
-            ###    'doublec': self.type2def['double'],
-            ###    'pointer_vertex': self.type2def['pointer_vertex']}
             output = '%(doublec)s allvertexes[]' % {
                 'doublec': self.type2def['double']}
+            outputname = 'allvertexes'
             comment_output = 'amplitude \'vertex\''
             template = '  template<class W_ACCESS, class A_ACCESS, class C_ACCESS>\n'
             template_define1 = '<W_ACCESS, A_ACCESS, CD_ACCESS>'
@@ -220,27 +222,39 @@ def get_header_txt(self, name=None, couplings=None, mode=''):
                      'doublec': self.type2def['double'],
                      'spin': self.particles[self.outgoing -1],
                      'id': self.outgoing}
+            outputname = 'all%(spin)s%(id)d' % {
+                     'spin': self.particles[self.outgoing -1],
+                     'id': self.outgoing}
             ###self.declaration.add(('list_complex', output)) # AV BUG FIX - THIS IS NOT NEEDED AND IS WRONG (adds name 'cxtype_sv V3[]')
             comment_output = 'wavefunction \'%s%d[6]\'' % ( self.particles[self.outgoing -1], self.outgoing ) # AV (wavefuncsize=6)
             template = '  template<class W_ACCESS, class C_ACCESS>\n'
             template_define1 = '<W_ACCESS, CD_ACCESS>'
         if 'linker' in mode: template = ''
         comment = '// Compute the output %s from the input wavefunctions %s' % ( comment_output, ', '.join(comment_inputs) ) # AV
-        if 'linker_decl' in mode : name = 'linker_' + name
-        indent = ' ' * len( '  %s( ' % name )
+        if 'linker_decl' in mode or 'linker_impl' in mode : name2 = 'linker_' + name
+        else: name2 = name
+        indent = ' ' * len( '  %s( ' % name2 )
         if not 'linker_define' in mode :
             out.write('  %(comment)s\n%(template)s  %(prefix)s void\n  %(name)s( const %(args)s,\n%(indent)s%(output)s )%(suffix)s' %
                       {'comment': comment, # AV - add comment
                        'template': template, # AV - add template
                        'prefix': self.prefix + ( ' INLINE' if 'is_h' in mode else '' ), # AV - add INLINE
                        'suffix': ( ' ALWAYS_INLINE' if 'is_h' in mode else '' ), # AV - add ALWAYS_INLINE
-                       'indent':indent, 'output':output, 'name': name,
+                       'indent':indent, 'output':output, 'name': name2,
                        'args': (',\n' + indent + 'const ').join(args)}) # AV - add const, add indent
             if 'is_h' in mode or 'linker_decl' in mode:
                 out.write(';\n')
                 out.write('\n  //--------------------------------------------------------------------------\n') # AV add footer
+            elif 'linker_impl' in mode:
+                out.write('\n  {\n')
+                out.write('    return %(name)s%(template)s( %(args)s, %(output)s );' %
+                          {'name': name,
+                           'template': template_define1,
+                           'output': outputname, 
+                           'args': ', '.join(argnames)})
+                out.write('\n  }\n')
+                out.write('\n  //--------------------------------------------------------------------------\n') # AV add footer
             else:
-                ###out.write('\n{\n')
                 out.write('\n  {\n') # AV
         elif 'linker_define1' in mode :
             out.write('#define helas_%s %s%s'%(name,name,template_define1))
@@ -659,6 +673,7 @@ class PLUGIN_UFOModelConverter(PLUGIN_export_cpp.UFOModelConverterGPU):
     ###cc_ext = 'cu'
     ###aloha_template_h = pjoin('gpu','cpp_hel_amps_h.inc')
     ###aloha_template_cc = pjoin('gpu','cpp_hel_amps_cc.inc')
+    aloha_template_cc2 = pjoin('gpu','cpp_hel_amps_cc2.inc')
     ###helas_h = pjoin('gpu', 'helas.h')
     ###helas_cc = pjoin('gpu', 'helas.cu')
 
@@ -679,7 +694,7 @@ def read_aloha_template_files(self, ext):
         out.append( file )
         return out
 
-    # AV - use the plugin's PLUGIN_OneProcessExporter template_path and __template_path (for aloha_template_h/cc)
+    # AV - use the plugin's PLUGIN_OneProcessExporter template_path and __template_path (for aloha_template_h/cc/cc2)
     @classmethod
     def read_template_file(cls, filename, classpath=False):
         """Open a template file and return the contents."""
@@ -1069,8 +1084,10 @@ def write_aloha_routines(self):
             os.makedirs(os.path.join(self.dir_path, self.cc_file_dir))
         model_h_file = os.path.join(self.dir_path, self.include_dir,
                                     'HelAmps_%s.h' % self.model_name)
-        model_cc_file = os.path.join(self.dir_path, self.cc_file_dir,
-                                     'HelAmps_%s.%s' % (self.model_name, self.cc_ext))
+        ###model_cc_file = os.path.join(self.dir_path, self.cc_file_dir,
+        ###                             'HelAmps_%s.%s' % (self.model_name, self.cc_ext))
+        model_cc2_file = os.path.join(self.dir_path+'/../SubProcesses', self.cc_file_dir,
+                                      'HelAmps.%s' % (self.cc_ext))
         replace_dict = {}
         replace_dict['output_name'] = self.output_name
         replace_dict['info_lines'] = PLUGIN_export_cpp.get_mg5_info_lines()
@@ -1082,6 +1099,7 @@ def write_aloha_routines(self):
         template_h2a_files = ['']
         template_h2b_files = ['']
         template_h2c_files = ['']
+        template_cc2_files = ['']
         aloha_model = create_aloha.AbstractALOHAModel(self.model.get('name'), explicit_combine=True)
         aloha_model.add_Lorentz_object(self.model.get('lorentz'))
         if self.wanted_lorentz:
@@ -1100,10 +1118,14 @@ def write_aloha_routines(self):
             template_h2a_files.append(h2a_rout)
             template_h2b_files.append(h2b_rout)
             template_h2c_files.append(h2c_rout)
+            cc2_rout = writer2.get_header_txt(mode='linker_impl')
+            template_cc2_files.append(cc2_rout)
         replace_dict['function_declarations'] = '\n'.join(template_h_files)
         replace_dict['function_definitions'] = '\n'.join(template_cc_files)
+        replace_dict['function_definitions2'] = '\n'.join(template_cc2_files)
         file_h = self.read_template_file(self.aloha_template_h) % replace_dict
         file_cc = self.read_template_file(self.aloha_template_cc) % replace_dict
+        file_cc2 = self.read_template_file(self.aloha_template_cc2) % replace_dict
         file_cc = '\n'.join( file_cc.split('\n')[9:] ) # skip first 9 lines in cpp_hel_amps_cc.inc (copyright including ALOHA)
         file_cc = file_cc[:-1] # skip the trailing empty line
         # Write the HelAmps_sm.h and HelAmps_sm.cc files
@@ -1135,6 +1157,11 @@ def write_aloha_routines(self):
         PLUGIN_writers.CPPWriter(model_h_file).writelines(file_h)
         logger.info('Created file %s in directory %s' \
                     % (os.path.split(model_h_file)[-1], os.path.split(model_h_file)[0] ) )
+        file_cc2_lines = file_cc2.split('\n')
+        file_cc2 = '\n'.join( file_cc2_lines[:-1]) # skip the last empty trailing line
+        PLUGIN_writers.CPPWriter(model_cc2_file).writelines(file_cc2)
+        logger.info('Created file %s in directory %s' \
+                    % (os.path.split(model_cc2_file)[-1], os.path.split(model_cc2_file)[0] ) )
 
     def prepare_couplings(self, wanted_couplings = []):
         super().prepare_couplings(wanted_couplings)
@@ -1475,10 +1502,10 @@ def generate_process_files(self):
         self.edit_check_sa()
         self.edit_mgonGPU()
         self.edit_processidfile() # AV new file (NB this is Sigma-specific, should not be a symlink to Subprocesses)
-        
         self.edit_testxxx() # AV new file (NB this is generic in Subprocesses and then linked in Sigma-specific)
         self.edit_memorybuffers() # AV new file (NB this is generic in Subprocesses and then linked in Sigma-specific)
         self.edit_memoryaccesscouplings() # AV new file (NB this is generic in Subprocesses and then linked in Sigma-specific)
+        files.ln(pjoin(self.path+'/..', 'HelAmps.cc'), self.path, 'HelAmps.cc')
         # NB: symlink of cudacpp.mk to makefile is overwritten by madevent makefile if this exists (#480)
         # NB: this relies on the assumption that cudacpp code is generated before madevent code
         files.ln(pjoin(self.path, 'cudacpp.mk'), self.path, 'makefile')
diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/HelAmps.cc
index 15add2407a..79486e92f0 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/HelAmps.cc
@@ -7,9 +7,6 @@
 
 #include "HelAmps_sm.h"
 
-// -----------------------------------------------------------------------------
-// Helicity amplitudes for calculating the matrix elements for
-// Process: g g > t t~ WEIGHTED<=2 @1
 // -----------------------------------------------------------------------------
 // *** NB: this implementation class depends on MemoryAccessMomenta,
 // *** where the AOSOA definition depends on CPPProcess::npar,
@@ -28,7 +25,6 @@
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
 #include "MemoryAccessDenominators.h"
 #include "MemoryAccessNumerators.h"
-//#include "coloramps.h"
 #endif
 
 #ifdef MGONGPUCPP_GPUIMPL
@@ -79,6 +75,8 @@ namespace mg5amcCpu
     return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
+  //--------------------------------------------------------------------------
+
   // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
   __device__ void
   linker_FFV1_0( const fptype allF1[],
@@ -91,6 +89,8 @@ namespace mg5amcCpu
     return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
+  //--------------------------------------------------------------------------
+
   // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
   __device__ void
   linker_FFV1_1( const fptype allF2[],
@@ -104,6 +104,8 @@ namespace mg5amcCpu
     return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
+  //--------------------------------------------------------------------------
+
   // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
   __device__ void
   linker_FFV1_2( const fptype allF1[],
@@ -119,5 +121,4 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 }
-
 #endif

From 9f1cfd2ce5b5a695e6d55fd327ade87520d035a9 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 19:49:46 +0200
Subject: [PATCH 20/50] [helas] regenerate gg_tt.mad, check that all is ok
 (codegen for HELINL=L is complete)

---
 .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt       | 28 ++++++++++---------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
index b7616fe096..ce00b9360a 100644
--- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
+++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005777120590209961 [0m
+[1;32mDEBUG: model prefixing  takes 0.005608797073364258 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.008 s
+1 processes with 3 diagrams generated in 0.009 s
 Total: 1 processes with 3 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -177,8 +177,8 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
 INFO: Creating files in directory P1_gg_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fa5a393fd30> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f72ca441dc0> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -194,25 +194,27 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1614][0m [0m
 Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
-Wrote files for 10 helas calls in 0.115 s
+Wrote files for 10 helas calls in 0.121 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.146 s
+ALOHA: aloha creates 2 routines in  0.150 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 4 routines in  0.132 s
+ALOHA: aloha creates 4 routines in  0.137 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.h
@@ -241,9 +243,9 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m1.927s
-user	0m1.671s
-sys	0m0.252s
+real	0m1.991s
+user	0m1.720s
+sys	0m0.272s
 Code generation completed in 2 seconds
 ************************************************************
 *                                                          *

From 5ca9d2d81c698341cd113a29e7ce96d63c4ff17e Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 19:56:00 +0200
Subject: [PATCH 21/50] [helas] regenerate all processes with support for
 HELINL=L

---
 .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt   |   32 +-
 .../ee_mumu.mad/SubProcesses/MemoryAccessGs.h |    2 +-
 .../SubProcesses/MemoryAccessMomenta.h        |    2 +-
 .../SubProcesses/P1_epem_mupmum/CPPProcess.cc |   12 +-
 .../SubProcesses/P1_epem_mupmum/check_sa.cc   |    2 +
 .../ee_mumu.mad/SubProcesses/cudacpp.mk       |   21 +-
 epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h   |  120 +
 .../cudacpp/ee_mumu.mad/src/cudacpp_config.mk |    2 +-
 .../cudacpp/ee_mumu.mad/src/mgOnGpuConfig.h   |   16 +-
 .../CODEGEN_cudacpp_ee_mumu_log.txt           |   14 +-
 .../ee_mumu.sa/SubProcesses/MemoryAccessGs.h  |    2 +-
 .../SubProcesses/MemoryAccessMomenta.h        |    2 +-
 .../P1_Sigma_sm_epem_mupmum/CPPProcess.cc     |   12 +-
 .../P1_Sigma_sm_epem_mupmum/check_sa.cc       |    2 +
 .../ee_mumu.sa/SubProcesses/cudacpp.mk        |   21 +-
 epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h    |  120 +
 .../cudacpp/ee_mumu.sa/src/cudacpp_config.mk  |    2 +-
 epochX/cudacpp/ee_mumu.sa/src/mgOnGpuConfig.h |   16 +-
 .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt       |   14 +-
 .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt    |   14 +-
 .../gg_tt.sa/SubProcesses/MemoryAccessGs.h    |    2 +-
 .../SubProcesses/MemoryAccessMomenta.h        |    2 +-
 .../P1_Sigma_sm_gg_ttx/CPPProcess.cc          |   16 +-
 .../P1_Sigma_sm_gg_ttx/check_sa.cc            |    2 +
 .../cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk  |   21 +-
 epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h      |   67 +
 epochX/cudacpp/gg_tt.sa/src/cudacpp_config.mk |    2 +-
 epochX/cudacpp/gg_tt.sa/src/mgOnGpuConfig.h   |   16 +-
 .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt |   47 +-
 .../SubProcesses/MemoryAccessGs.h             |    2 +-
 .../SubProcesses/MemoryAccessMomenta.h        |    2 +-
 .../SubProcesses/P1_gg_ttx/CPPProcess.cc      |   16 +-
 .../SubProcesses/P1_gg_ttx/check_sa.cc        |    2 +
 .../SubProcesses/P2_gg_ttxg/CPPProcess.cc     |   66 +-
 .../SubProcesses/P2_gg_ttxg/check_sa.cc       |    2 +
 .../gg_tt01g.mad/SubProcesses/cudacpp.mk      |   21 +-
 epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h  |  139 +
 .../gg_tt01g.mad/src/cudacpp_config.mk        |    2 +-
 .../cudacpp/gg_tt01g.mad/src/mgOnGpuConfig.h  |   16 +-
 .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt     |   35 +-
 .../gg_ttg.mad/SubProcesses/MemoryAccessGs.h  |    2 +-
 .../SubProcesses/MemoryAccessMomenta.h        |    2 +-
 .../SubProcesses/P1_gg_ttxg/CPPProcess.cc     |   66 +-
 .../SubProcesses/P1_gg_ttxg/check_sa.cc       |    2 +
 .../gg_ttg.mad/SubProcesses/cudacpp.mk        |   21 +-
 epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h    |  139 +
 .../cudacpp/gg_ttg.mad/src/cudacpp_config.mk  |    2 +-
 epochX/cudacpp/gg_ttg.mad/src/mgOnGpuConfig.h |   16 +-
 .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt  |   16 +-
 .../gg_ttg.sa/SubProcesses/MemoryAccessGs.h   |    2 +-
 .../SubProcesses/MemoryAccessMomenta.h        |    2 +-
 .../P1_Sigma_sm_gg_ttxg/CPPProcess.cc         |   66 +-
 .../P1_Sigma_sm_gg_ttxg/check_sa.cc           |    2 +
 .../cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk |   21 +-
 epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h     |  139 +
 .../cudacpp/gg_ttg.sa/src/cudacpp_config.mk   |    2 +-
 epochX/cudacpp/gg_ttg.sa/src/mgOnGpuConfig.h  |   16 +-
 .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt   |   35 +-
 .../gg_ttgg.mad/SubProcesses/MemoryAccessGs.h |    2 +-
 .../SubProcesses/MemoryAccessMomenta.h        |    2 +-
 .../SubProcesses/P1_gg_ttxgg/CPPProcess.cc    |  436 +-
 .../SubProcesses/P1_gg_ttxgg/check_sa.cc      |    2 +
 .../gg_ttgg.mad/SubProcesses/cudacpp.mk       |   21 +-
 epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h   |  181 +
 .../cudacpp/gg_ttgg.mad/src/cudacpp_config.mk |    2 +-
 .../cudacpp/gg_ttgg.mad/src/mgOnGpuConfig.h   |   16 +-
 .../CODEGEN_cudacpp_gg_ttgg_log.txt           |   16 +-
 .../gg_ttgg.sa/SubProcesses/MemoryAccessGs.h  |    2 +-
 .../SubProcesses/MemoryAccessMomenta.h        |    2 +-
 .../P1_Sigma_sm_gg_ttxgg/CPPProcess.cc        |  436 +-
 .../P1_Sigma_sm_gg_ttxgg/check_sa.cc          |    2 +
 .../gg_ttgg.sa/SubProcesses/cudacpp.mk        |   21 +-
 epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h    |  181 +
 .../cudacpp/gg_ttgg.sa/src/cudacpp_config.mk  |    2 +-
 epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuConfig.h |   16 +-
 .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt |   37 +-
 .../SubProcesses/MemoryAccessGs.h             |    2 +-
 .../SubProcesses/MemoryAccessMomenta.h        |    2 +-
 .../SubProcesses/P1_gg_ttxggg/CPPProcess.cc   | 4552 +++++++++--------
 .../SubProcesses/P1_gg_ttxggg/check_sa.cc     |    2 +
 .../gg_ttggg.mad/SubProcesses/cudacpp.mk      |   21 +-
 epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h  |  181 +
 .../gg_ttggg.mad/src/cudacpp_config.mk        |    2 +-
 .../cudacpp/gg_ttggg.mad/src/mgOnGpuConfig.h  |   16 +-
 .../CODEGEN_cudacpp_gg_ttggg_log.txt          |   18 +-
 .../gg_ttggg.sa/SubProcesses/MemoryAccessGs.h |    2 +-
 .../SubProcesses/MemoryAccessMomenta.h        |    2 +-
 .../P1_Sigma_sm_gg_ttxggg/CPPProcess.cc       | 4552 +++++++++--------
 .../P1_Sigma_sm_gg_ttxggg/check_sa.cc         |    2 +
 .../gg_ttggg.sa/SubProcesses/cudacpp.mk       |   21 +-
 epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h   |  181 +
 .../cudacpp/gg_ttggg.sa/src/cudacpp_config.mk |    2 +-
 .../cudacpp/gg_ttggg.sa/src/mgOnGpuConfig.h   |   16 +-
 .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt     |   52 +-
 .../gq_ttq.mad/SubProcesses/MemoryAccessGs.h  |    2 +-
 .../SubProcesses/MemoryAccessMomenta.h        |    2 +-
 .../SubProcesses/P1_gu_ttxu/CPPProcess.cc     |   26 +-
 .../SubProcesses/P1_gu_ttxu/check_sa.cc       |    2 +
 .../SubProcesses/P1_gux_ttxux/CPPProcess.cc   |   26 +-
 .../SubProcesses/P1_gux_ttxux/check_sa.cc     |    2 +
 .../gq_ttq.mad/SubProcesses/cudacpp.mk        |   21 +-
 epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h    |   80 +
 .../cudacpp/gq_ttq.mad/src/cudacpp_config.mk  |    2 +-
 epochX/cudacpp/gq_ttq.mad/src/mgOnGpuConfig.h |   16 +-
 .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt  |   16 +-
 .../gq_ttq.sa/SubProcesses/MemoryAccessGs.h   |    2 +-
 .../SubProcesses/MemoryAccessMomenta.h        |    2 +-
 .../P1_Sigma_sm_gu_ttxu/CPPProcess.cc         |   26 +-
 .../P1_Sigma_sm_gu_ttxu/check_sa.cc           |    2 +
 .../P1_Sigma_sm_gux_ttxux/CPPProcess.cc       |   26 +-
 .../P1_Sigma_sm_gux_ttxux/check_sa.cc         |    2 +
 .../cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk |   21 +-
 epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h     |   80 +
 .../cudacpp/gq_ttq.sa/src/cudacpp_config.mk   |    2 +-
 epochX/cudacpp/gq_ttq.sa/src/mgOnGpuConfig.h  |   16 +-
 .../CODEGEN_mad_heft_gg_bb_log.txt            |   26 +-
 .../SubProcesses/MemoryAccessGs.h             |    2 +-
 .../SubProcesses/MemoryAccessMomenta.h        |    2 +-
 .../SubProcesses/P1_gg_bbx/CPPProcess.cc      |   20 +-
 .../SubProcesses/P1_gg_bbx/check_sa.cc        |    2 +
 .../heft_gg_bb.mad/SubProcesses/cudacpp.mk    |   21 +-
 .../cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h |   94 +
 .../heft_gg_bb.mad/src/cudacpp_config.mk      |    2 +-
 .../heft_gg_bb.mad/src/mgOnGpuConfig.h        |   16 +-
 .../CODEGEN_cudacpp_heft_gg_bb_log.txt        |   10 +-
 .../SubProcesses/MemoryAccessGs.h             |    2 +-
 .../SubProcesses/MemoryAccessMomenta.h        |    2 +-
 .../P1_Sigma_heft_gg_bbx/CPPProcess.cc        |   20 +-
 .../P1_Sigma_heft_gg_bbx/check_sa.cc          |    2 +
 .../heft_gg_bb.sa/SubProcesses/cudacpp.mk     |   21 +-
 .../cudacpp/heft_gg_bb.sa/src/HelAmps_heft.h  |   94 +
 .../heft_gg_bb.sa/src/cudacpp_config.mk       |    2 +-
 .../cudacpp/heft_gg_bb.sa/src/mgOnGpuConfig.h |   16 +-
 .../CODEGEN_mad_pp_tt012j_log.txt             |  289 +-
 .../SubProcesses/MemoryAccessGs.h             |    2 +-
 .../SubProcesses/MemoryAccessMomenta.h        |    2 +-
 .../SubProcesses/P0_gg_ttx/CPPProcess.cc      |   16 +-
 .../SubProcesses/P0_gg_ttx/check_sa.cc        |    2 +
 .../SubProcesses/P0_uux_ttx/CPPProcess.cc     |    8 +-
 .../SubProcesses/P0_uux_ttx/check_sa.cc       |    2 +
 .../SubProcesses/P1_gg_ttxg/CPPProcess.cc     |   66 +-
 .../SubProcesses/P1_gg_ttxg/check_sa.cc       |    2 +
 .../SubProcesses/P1_gu_ttxu/CPPProcess.cc     |   26 +-
 .../SubProcesses/P1_gu_ttxu/check_sa.cc       |    2 +
 .../SubProcesses/P1_gux_ttxux/CPPProcess.cc   |   26 +-
 .../SubProcesses/P1_gux_ttxux/check_sa.cc     |    2 +
 .../SubProcesses/P1_uux_ttxg/CPPProcess.cc    |   26 +-
 .../SubProcesses/P1_uux_ttxg/check_sa.cc      |    2 +
 .../SubProcesses/P2_gg_ttxgg/CPPProcess.cc    |  436 +-
 .../SubProcesses/P2_gg_ttxgg/check_sa.cc      |    2 +
 .../SubProcesses/P2_gg_ttxuux/CPPProcess.cc   |  138 +-
 .../SubProcesses/P2_gg_ttxuux/check_sa.cc     |    2 +
 .../SubProcesses/P2_gu_ttxgu/CPPProcess.cc    |  138 +-
 .../SubProcesses/P2_gu_ttxgu/check_sa.cc      |    2 +
 .../SubProcesses/P2_gux_ttxgux/CPPProcess.cc  |  138 +-
 .../SubProcesses/P2_gux_ttxgux/check_sa.cc    |    2 +
 .../SubProcesses/P2_uc_ttxuc/CPPProcess.cc    |   36 +-
 .../SubProcesses/P2_uc_ttxuc/check_sa.cc      |    2 +
 .../SubProcesses/P2_ucx_ttxucx/CPPProcess.cc  |   36 +-
 .../SubProcesses/P2_ucx_ttxucx/check_sa.cc    |    2 +
 .../SubProcesses/P2_uu_ttxuu/CPPProcess.cc    |   64 +-
 .../SubProcesses/P2_uu_ttxuu/check_sa.cc      |    2 +
 .../SubProcesses/P2_uux_ttxccx/CPPProcess.cc  |   36 +-
 .../SubProcesses/P2_uux_ttxccx/check_sa.cc    |    2 +
 .../SubProcesses/P2_uux_ttxgg/CPPProcess.cc   |  138 +-
 .../SubProcesses/P2_uux_ttxgg/check_sa.cc     |    2 +
 .../SubProcesses/P2_uux_ttxuux/CPPProcess.cc  |   64 +-
 .../SubProcesses/P2_uux_ttxuux/check_sa.cc    |    2 +
 .../P2_uxcx_ttxuxcx/CPPProcess.cc             |   36 +-
 .../SubProcesses/P2_uxcx_ttxuxcx/check_sa.cc  |    2 +
 .../P2_uxux_ttxuxux/CPPProcess.cc             |   64 +-
 .../SubProcesses/P2_uxux_ttxuxux/check_sa.cc  |    2 +
 .../pp_tt012j.mad/SubProcesses/cudacpp.mk     |   21 +-
 epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h |  181 +
 .../pp_tt012j.mad/src/cudacpp_config.mk       |    2 +-
 .../cudacpp/pp_tt012j.mad/src/mgOnGpuConfig.h |   16 +-
 .../CODEGEN_mad_smeft_gg_tttt_log.txt         |   35 +-
 .../SubProcesses/MemoryAccessGs.h             |    2 +-
 .../SubProcesses/MemoryAccessMomenta.h        |    2 +-
 .../SubProcesses/P1_gg_ttxttx/CPPProcess.cc   |  230 +-
 .../SubProcesses/P1_gg_ttxttx/check_sa.cc     |    2 +
 .../smeft_gg_tttt.mad/SubProcesses/cudacpp.mk |   21 +-
 .../HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h    |  136 +
 .../smeft_gg_tttt.mad/src/cudacpp_config.mk   |    2 +-
 .../smeft_gg_tttt.mad/src/mgOnGpuConfig.h     |   16 +-
 .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt     |   18 +-
 .../SubProcesses/MemoryAccessGs.h             |    2 +-
 .../SubProcesses/MemoryAccessMomenta.h        |    2 +-
 .../CPPProcess.cc                             |  230 +-
 .../check_sa.cc                               |    2 +
 .../smeft_gg_tttt.sa/SubProcesses/cudacpp.mk  |   21 +-
 .../HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h    |  136 +
 .../smeft_gg_tttt.sa/src/cudacpp_config.mk    |    2 +-
 .../smeft_gg_tttt.sa/src/mgOnGpuConfig.h      |   16 +-
 .../CODEGEN_mad_susy_gg_t1t1_log.txt          |   33 +-
 .../SubProcesses/MemoryAccessGs.h             |    2 +-
 .../SubProcesses/MemoryAccessMomenta.h        |    2 +-
 .../SubProcesses/P1_gg_t1t1x/CPPProcess.cc    |   28 +-
 .../SubProcesses/P1_gg_t1t1x/check_sa.cc      |    2 +
 .../susy_gg_t1t1.mad/SubProcesses/cudacpp.mk  |   21 +-
 .../susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h |   81 +
 .../susy_gg_t1t1.mad/src/cudacpp_config.mk    |    2 +-
 .../susy_gg_t1t1.mad/src/mgOnGpuConfig.h      |   16 +-
 .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt      |   12 +-
 .../SubProcesses/MemoryAccessGs.h             |    2 +-
 .../SubProcesses/MemoryAccessMomenta.h        |    2 +-
 .../CPPProcess.cc                             |   28 +-
 .../P1_Sigma_MSSM_SLHA2_gg_t1t1x/check_sa.cc  |    2 +
 .../susy_gg_t1t1.sa/SubProcesses/cudacpp.mk   |   21 +-
 .../susy_gg_t1t1.sa/src/HelAmps_MSSM_SLHA2.h  |   81 +
 .../susy_gg_t1t1.sa/src/cudacpp_config.mk     |    2 +-
 .../susy_gg_t1t1.sa/src/mgOnGpuConfig.h       |   16 +-
 .../CODEGEN_mad_susy_gg_tt_log.txt            |   28 +-
 .../SubProcesses/MemoryAccessGs.h             |    2 +-
 .../SubProcesses/MemoryAccessMomenta.h        |    2 +-
 .../SubProcesses/P1_gg_ttx/CPPProcess.cc      |   16 +-
 .../SubProcesses/P1_gg_ttx/check_sa.cc        |    2 +
 .../susy_gg_tt.mad/SubProcesses/cudacpp.mk    |   21 +-
 .../susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h   |   67 +
 .../susy_gg_tt.mad/src/cudacpp_config.mk      |    2 +-
 .../susy_gg_tt.mad/src/mgOnGpuConfig.h        |   16 +-
 .../CODEGEN_cudacpp_susy_gg_tt_log.txt        |   12 +-
 .../SubProcesses/MemoryAccessGs.h             |    2 +-
 .../SubProcesses/MemoryAccessMomenta.h        |    2 +-
 .../P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc  |   16 +-
 .../P1_Sigma_MSSM_SLHA2_gg_ttx/check_sa.cc    |    2 +
 .../susy_gg_tt.sa/SubProcesses/cudacpp.mk     |   21 +-
 .../susy_gg_tt.sa/src/HelAmps_MSSM_SLHA2.h    |   67 +
 .../susy_gg_tt.sa/src/cudacpp_config.mk       |    2 +-
 .../cudacpp/susy_gg_tt.sa/src/mgOnGpuConfig.h |   16 +-
 230 files changed, 9996 insertions(+), 6757 deletions(-)

diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
index f059e68f5e..0676f497e9 100644
--- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
+++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
@@ -62,7 +62,7 @@ generate e+ e- > mu+ mu-
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005307912826538086 [0m
+[1;32mDEBUG: model prefixing  takes 0.0057065486907958984 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -154,7 +154,7 @@ INFO: Checking for minimal orders which gives processes.
 INFO: Please specify coupling orders to bypass this step. 
 INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1  
 INFO: Process has 2 diagrams 
-1 processes with 2 diagrams generated in 0.004 s
+1 processes with 2 diagrams generated in 0.005 s
 Total: 1 processes with 2 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -176,8 +176,8 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 
 INFO: Processing color information for process: e+ e- > mu+ mu- @1 
 INFO: Creating files in directory P1_epem_mupmum 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f09ed66e490> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fa457e6f550> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -194,22 +194,22 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 
 INFO: Finding symmetric diagrams for subprocess group epem_mupmum 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses/P1_epem_mupmum [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 2 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 2 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1614][0m [0m
 Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s
-Wrote files for 8 helas calls in 0.112 s
+Wrote files for 8 helas calls in 0.119 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
-ALOHA: aloha creates 3 routines in  0.198 s
+ALOHA: aloha creates 3 routines in  0.212 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
 ALOHA: aloha creates FFV2_4 routines[0m
-ALOHA: aloha creates 7 routines in  0.253 s
+ALOHA: aloha creates 7 routines in  0.272 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2
@@ -220,6 +220,8 @@ ALOHA: aloha creates 7 routines in  0.253 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2_4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.h
@@ -242,9 +244,7 @@ patching file auto_dsig1.f
 Hunk #1 succeeded at 496 (offset 12 lines).
 patching file driver.f
 patching file matrix1.f
-Hunk #3 succeeded at 230 (offset 9 lines).
-Hunk #4 succeeded at 267 (offset 18 lines).
-Hunk #5 succeeded at 312 (offset 18 lines).
+Hunk #2 succeeded at 229 (offset 9 lines).
 [1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 242][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu done.
 Type "launch" to generate events from this process, or see
@@ -252,9 +252,9 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.067s
-user	0m1.807s
-sys	0m0.251s
+real	0m2.169s
+user	0m1.894s
+sys	0m0.268s
 Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/ee_mumu.mad/SubProcesses/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/MemoryAccessGs.h
+++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/ee_mumu.mad/SubProcesses/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 
diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc
index f9893b8c40..284269a9d8 100644
--- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc
+++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc
@@ -204,7 +204,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +218,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -328,10 +332,10 @@ namespace mg5amcCpu
 
       oxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][3], +1, w_fp[3], 3 );
 
-      FFV1P0_3<W_ACCESS, CI_ACCESS>( w_fp[1], w_fp[0], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[4] );
+      helas_FFV1P0_3( w_fp[1], w_fp[0], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( w_fp[2], w_fp[3], w_fp[4], COUPs[ndcoup + 0], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[2], w_fp[3], w_fp[4], COUPs[ndcoup + 0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -341,10 +345,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 2 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV2_4_3<W_ACCESS, CI_ACCESS>( w_fp[1], w_fp[0], COUPs[ndcoup + 1], 1.0, COUPs[ndcoup + 2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_FFV2_4_3( w_fp[1], w_fp[0], COUPs[ndcoup + 1], 1.0, COUPs[ndcoup + 2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      FFV2_4_0<W_ACCESS, A_ACCESS, CI_ACCESS>( w_fp[2], w_fp[3], w_fp[4], COUPs[ndcoup + 1], 1.0, COUPs[ndcoup + 2], 1.0, &amp_fp[0] );
+      helas_FFV2_4_0( w_fp[2], w_fp[3], w_fp[4], COUPs[ndcoup + 1], 1.0, COUPs[ndcoup + 2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/check_sa.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/check_sa.cc
+++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk
index 9cff5e1a60..0d8ea00f01 100644
--- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk
@@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
@@ -647,7 +650,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -776,6 +778,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
@@ -786,12 +796,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -962,6 +972,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
diff --git a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h
index 74d9a80c1a..a392267d1b 100644
--- a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h
@@ -1207,8 +1207,128 @@ namespace mg5amcCpu
     return;
   }
 
+  //==========================================================================
+
+#ifndef MGONGPU_LINKER_HELAMPS
+
+#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_FFV2_0 FFV2_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV2_3 FFV2_3<W_ACCESS, CD_ACCESS>
+#define helas_FFV4_0 FFV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV4_3 FFV4_3<W_ACCESS, CD_ACCESS>
+#define helas_FFV2_4_0 FFV2_4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV2_4_3 FFV2_4_3<W_ACCESS, CD_ACCESS>
+
+#else
+
+#define helas_FFV1_0 linker_FFV1_0
+#define helas_FFV1P0_3 linker_FFV1P0_3
+#define helas_FFV2_0 linker_FFV2_0
+#define helas_FFV2_3 linker_FFV2_3
+#define helas_FFV4_0 linker_FFV4_0
+#define helas_FFV4_3 linker_FFV4_3
+#define helas_FFV2_4_0 linker_FFV2_4_0
+#define helas_FFV2_4_3 linker_FFV2_4_3
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] );
+
   //--------------------------------------------------------------------------
 
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV2_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV2_3( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M3,
+                 const fptype W3,
+                 fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV4_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV4_3( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M3,
+                 const fptype W3,
+                 fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV2_4_0( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV2_4_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+#endif
+
+  //==========================================================================
+
 } // end namespace
 
 #endif // HelAmps_sm_H
diff --git a/epochX/cudacpp/ee_mumu.mad/src/cudacpp_config.mk b/epochX/cudacpp/ee_mumu.mad/src/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/ee_mumu.mad/src/cudacpp_config.mk
+++ b/epochX/cudacpp/ee_mumu.mad/src/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuConfig.h
index 858546db00..cf6a228859 100644
--- a/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuConfig.h
+++ b/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -78,9 +78,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -147,6 +154,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {
diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
index a96bc91d5b..e54e839724 100644
--- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
+++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
@@ -62,7 +62,7 @@ generate e+ e- > mu+ mu-
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005346059799194336 [0m
+[1;32mDEBUG: model prefixing  takes 0.005738258361816406 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -177,13 +177,13 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. 
-Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s
+Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
 ALOHA: aloha creates FFV2_4 routines[0m
-ALOHA: aloha creates 4 routines in  0.264 s
+ALOHA: aloha creates 4 routines in  0.278 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2
@@ -194,6 +194,8 @@ ALOHA: aloha creates 4 routines in  0.264 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2_4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.h
@@ -202,7 +204,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. 
 quit
 
-real	0m0.647s
-user	0m0.592s
-sys	0m0.048s
+real	0m0.686s
+user	0m0.624s
+sys	0m0.053s
 Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/ee_mumu.sa/SubProcesses/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/MemoryAccessGs.h
+++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/ee_mumu.sa/SubProcesses/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 
diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc
index f635c67b11..7835cfcc44 100644
--- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc
+++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc
@@ -204,7 +204,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +218,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -328,10 +332,10 @@ namespace mg5amcCpu
 
       oxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][3], +1, w_fp[3], 3 );
 
-      FFV1P0_3<W_ACCESS, CI_ACCESS>( w_fp[1], w_fp[0], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[4] );
+      helas_FFV1P0_3( w_fp[1], w_fp[0], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( w_fp[2], w_fp[3], w_fp[4], COUPs[ndcoup + 0], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[2], w_fp[3], w_fp[4], COUPs[ndcoup + 0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -340,10 +344,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 2 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV2_4_3<W_ACCESS, CI_ACCESS>( w_fp[1], w_fp[0], COUPs[ndcoup + 1], 1.0, COUPs[ndcoup + 2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_FFV2_4_3( w_fp[1], w_fp[0], COUPs[ndcoup + 1], 1.0, COUPs[ndcoup + 2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      FFV2_4_0<W_ACCESS, A_ACCESS, CI_ACCESS>( w_fp[2], w_fp[3], w_fp[4], COUPs[ndcoup + 1], 1.0, COUPs[ndcoup + 2], 1.0, &amp_fp[0] );
+      helas_FFV2_4_0( w_fp[2], w_fp[3], w_fp[4], COUPs[ndcoup + 1], 1.0, COUPs[ndcoup + 2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/check_sa.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/check_sa.cc
+++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk
index 9cff5e1a60..0d8ea00f01 100644
--- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk
@@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
@@ -647,7 +650,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -776,6 +778,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
@@ -786,12 +796,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -962,6 +972,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
diff --git a/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h b/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h
index 74d9a80c1a..a392267d1b 100644
--- a/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h
+++ b/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h
@@ -1207,8 +1207,128 @@ namespace mg5amcCpu
     return;
   }
 
+  //==========================================================================
+
+#ifndef MGONGPU_LINKER_HELAMPS
+
+#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_FFV2_0 FFV2_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV2_3 FFV2_3<W_ACCESS, CD_ACCESS>
+#define helas_FFV4_0 FFV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV4_3 FFV4_3<W_ACCESS, CD_ACCESS>
+#define helas_FFV2_4_0 FFV2_4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV2_4_3 FFV2_4_3<W_ACCESS, CD_ACCESS>
+
+#else
+
+#define helas_FFV1_0 linker_FFV1_0
+#define helas_FFV1P0_3 linker_FFV1P0_3
+#define helas_FFV2_0 linker_FFV2_0
+#define helas_FFV2_3 linker_FFV2_3
+#define helas_FFV4_0 linker_FFV4_0
+#define helas_FFV4_3 linker_FFV4_3
+#define helas_FFV2_4_0 linker_FFV2_4_0
+#define helas_FFV2_4_3 linker_FFV2_4_3
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] );
+
   //--------------------------------------------------------------------------
 
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV2_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV2_3( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M3,
+                 const fptype W3,
+                 fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV4_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV4_3( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M3,
+                 const fptype W3,
+                 fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV2_4_0( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV2_4_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+#endif
+
+  //==========================================================================
+
 } // end namespace
 
 #endif // HelAmps_sm_H
diff --git a/epochX/cudacpp/ee_mumu.sa/src/cudacpp_config.mk b/epochX/cudacpp/ee_mumu.sa/src/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/ee_mumu.sa/src/cudacpp_config.mk
+++ b/epochX/cudacpp/ee_mumu.sa/src/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuConfig.h b/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuConfig.h
index ad528bf8f3..5d79a575e7 100644
--- a/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuConfig.h
+++ b/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -78,9 +78,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -147,6 +154,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {
diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
index ce00b9360a..a1c9fae589 100644
--- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
+++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005608797073364258 [0m
+[1;32mDEBUG: model prefixing  takes 0.005675554275512695 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.009 s
+1 processes with 3 diagrams generated in 0.008 s
 Total: 1 processes with 3 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -178,7 +178,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1
 INFO: Processing color information for process: g g > t t~ @1 
 INFO: Creating files in directory P1_gg_ttx 
 [1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f72ca441dc0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fdad2083e80> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -206,7 +206,7 @@ ALOHA: aloha creates 2 routines in  0.150 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 4 routines in  0.137 s
+ALOHA: aloha creates 4 routines in  0.136 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -243,9 +243,9 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m1.991s
-user	0m1.720s
-sys	0m0.272s
+real	0m1.975s
+user	0m1.709s
+sys	0m0.264s
 Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt
index b84f753a35..3d103ae0db 100644
--- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt
+++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005595207214355469 [0m
+[1;32mDEBUG: model prefixing  takes 0.005746364593505859 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -182,13 +182,15 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.144 s
+ALOHA: aloha creates 2 routines in  0.152 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.h
@@ -197,7 +199,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. 
 quit
 
-real	0m0.556s
-user	0m0.475s
-sys	0m0.048s
-Code generation completed in 1 seconds
+real	0m0.560s
+user	0m0.499s
+sys	0m0.055s
+Code generation completed in 0 seconds
diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/gg_tt.sa/SubProcesses/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/gg_tt.sa/SubProcesses/MemoryAccessGs.h
+++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/gg_tt.sa/SubProcesses/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/gg_tt.sa/SubProcesses/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 
diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc
index 20589146fb..cda3e64ada 100644
--- a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc
+++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc
@@ -204,7 +204,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +218,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -328,10 +332,10 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 );
 
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
+      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -341,10 +345,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 3 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -353,10 +357,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 3 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/check_sa.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/check_sa.cc
+++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk
index 9cff5e1a60..0d8ea00f01 100644
--- a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk
@@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
@@ -647,7 +650,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -776,6 +778,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
@@ -786,12 +796,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -962,6 +972,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
diff --git a/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h
index 1c2d0cd26a..574dd3755c 100644
--- a/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h
@@ -1032,8 +1032,75 @@ namespace mg5amcCpu
     return;
   }
 
+  //==========================================================================
+
+#ifndef MGONGPU_LINKER_HELAMPS
+
+#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+
+#else
+
+#define helas_VVV1P0_1 linker_VVV1P0_1
+#define helas_FFV1_0 linker_FFV1_0
+#define helas_FFV1_1 linker_FFV1_1
+#define helas_FFV1_2 linker_FFV1_2
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] );
+
   //--------------------------------------------------------------------------
 
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+#endif
+
+  //==========================================================================
+
 } // end namespace
 
 #endif // HelAmps_sm_H
diff --git a/epochX/cudacpp/gg_tt.sa/src/cudacpp_config.mk b/epochX/cudacpp/gg_tt.sa/src/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/gg_tt.sa/src/cudacpp_config.mk
+++ b/epochX/cudacpp/gg_tt.sa/src/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/gg_tt.sa/src/mgOnGpuConfig.h b/epochX/cudacpp/gg_tt.sa/src/mgOnGpuConfig.h
index ad528bf8f3..5d79a575e7 100644
--- a/epochX/cudacpp/gg_tt.sa/src/mgOnGpuConfig.h
+++ b/epochX/cudacpp/gg_tt.sa/src/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -78,9 +78,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -147,6 +154,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {
diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt
index 7fabd11d28..6f11cac977 100644
--- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt
+++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005646228790283203 [0m
+[1;32mDEBUG: model prefixing  takes 0.005694866180419922 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.009 s
+1 processes with 3 diagrams generated in 0.008 s
 Total: 1 processes with 3 diagrams
 add process g g > t t~ g
 INFO: Checking for minimal orders which gives processes. 
@@ -163,7 +163,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2  
 INFO: Process has 16 diagrams 
-1 processes with 16 diagrams generated in 0.019 s
+1 processes with 16 diagrams generated in 0.020 s
 Total: 2 processes with 19 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -187,8 +187,8 @@ INFO: Processing color information for process: g g > t t~ g @2
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
 INFO: Creating files in directory P2_gg_ttxg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2f6d99dc70> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fad7ad3f970> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -204,12 +204,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxg 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P2_gg_ttxg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1614][0m [0m
 INFO: Creating files in directory P1_gg_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2f6d99dc70> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fad7ad3fac0> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -225,25 +225,25 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1545][0m [0m
-Generated helas calls for 2 subprocesses (19 diagrams) in 0.043 s
-Wrote files for 46 helas calls in 0.275 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1614][0m [0m
+Generated helas calls for 2 subprocesses (19 diagrams) in 0.044 s
+Wrote files for 46 helas calls in 0.281 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 5 routines in  0.331 s
+ALOHA: aloha creates 5 routines in  0.339 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 10 routines in  0.315 s
+ALOHA: aloha creates 10 routines in  0.320 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -255,6 +255,8 @@ ALOHA: aloha creates 10 routines in  0.315 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.h
@@ -280,10 +282,7 @@ DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01
 patching file auto_dsig1.f
 patching file driver.f
 patching file matrix1.f
-Hunk #2 succeeded at 159 (offset 16 lines).
-Hunk #3 succeeded at 237 (offset 16 lines).
-Hunk #4 succeeded at 265 (offset 16 lines).
-Hunk #5 succeeded at 310 (offset 16 lines).
+Hunk #2 succeeded at 236 (offset 16 lines).
 [1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 242][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g done.
 Type "launch" to generate events from this process, or see
@@ -291,9 +290,9 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.676s
-user	0m2.362s
-sys	0m0.310s
+real	0m2.743s
+user	0m2.420s
+sys	0m0.321s
 Code generation completed in 3 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MemoryAccessGs.h
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
index 45fb7860e9..576ea7cb4d 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
@@ -204,7 +204,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +218,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -328,10 +332,10 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 );
 
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
+      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -342,10 +346,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 3 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -355,10 +359,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 3 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/check_sa.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/check_sa.cc
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc
index b4f10898b0..3714bf4dce 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc
@@ -204,7 +204,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +218,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -330,11 +334,11 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 );
 
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -347,10 +351,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 16 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
+      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -361,10 +365,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 16 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -375,11 +379,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 16 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -389,10 +393,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 16 ***
 
       // Wavefunction(s) for diagram number 5
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -406,7 +410,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -416,11 +420,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 16 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -433,7 +437,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 8
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -447,7 +451,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 9
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -457,10 +461,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 16 ***
 
       // Wavefunction(s) for diagram number 10
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 10
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -474,7 +478,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 11
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -488,7 +492,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 12
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -504,7 +508,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 13
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -517,7 +521,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -530,7 +534,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 15
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -543,22 +547,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 16 OF 16 ***
 
       // Wavefunction(s) for diagram number 16
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 16
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[4] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/check_sa.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/check_sa.cc
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk
index 9cff5e1a60..0d8ea00f01 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk
@@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
@@ -647,7 +650,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -776,6 +778,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
@@ -786,12 +796,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -962,6 +972,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
diff --git a/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h
index 5742cd4648..5efae129bb 100644
--- a/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h
@@ -1271,8 +1271,147 @@ namespace mg5amcCpu
     return;
   }
 
+  //==========================================================================
+
+#ifndef MGONGPU_LINKER_HELAMPS
+
+#define helas_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
+#define helas_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+
+#else
+
+#define helas_VVV1_0 linker_VVV1_0
+#define helas_VVV1P0_1 linker_VVV1P0_1
+#define helas_FFV1_0 linker_FFV1_0
+#define helas_FFV1_1 linker_FFV1_1
+#define helas_FFV1_2 linker_FFV1_2
+#define helas_FFV1P0_3 linker_FFV1P0_3
+#define helas_VVVV1P0_1 linker_VVVV1P0_1
+#define helas_VVVV3P0_1 linker_VVVV3P0_1
+#define helas_VVVV4P0_1 linker_VVVV4P0_1
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV1_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
   //--------------------------------------------------------------------------
 
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+#endif
+
+  //==========================================================================
+
 } // end namespace
 
 #endif // HelAmps_sm_H
diff --git a/epochX/cudacpp/gg_tt01g.mad/src/cudacpp_config.mk b/epochX/cudacpp/gg_tt01g.mad/src/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/gg_tt01g.mad/src/cudacpp_config.mk
+++ b/epochX/cudacpp/gg_tt01g.mad/src/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuConfig.h
index 858546db00..cf6a228859 100644
--- a/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuConfig.h
+++ b/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -78,9 +78,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -147,6 +154,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {
diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt
index 18b1d80415..2635bfe901 100644
--- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt
+++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~ g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005260467529296875 [0m
+[1;32mDEBUG: model prefixing  takes 0.005773782730102539 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1  
 INFO: Process has 16 diagrams 
-1 processes with 16 diagrams generated in 0.021 s
+1 processes with 16 diagrams generated in 0.022 s
 Total: 1 processes with 16 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -177,8 +177,8 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 
 INFO: Processing color information for process: g g > t t~ g @1 
 INFO: Creating files in directory P1_gg_ttxg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f1716271c70> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f789b931b50> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -194,25 +194,25 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxg 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1545][0m [0m
-Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s
-Wrote files for 36 helas calls in 0.162 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1614][0m [0m
+Generated helas calls for 1 subprocesses (16 diagrams) in 0.040 s
+Wrote files for 36 helas calls in 0.171 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 5 routines in  0.322 s
+ALOHA: aloha creates 5 routines in  0.338 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 10 routines in  0.308 s
+ALOHA: aloha creates 10 routines in  0.322 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -224,6 +224,8 @@ ALOHA: aloha creates 10 routines in  0.308 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.h
@@ -245,10 +247,7 @@ DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/
 patching file auto_dsig1.f
 patching file driver.f
 patching file matrix1.f
-Hunk #2 succeeded at 159 (offset 16 lines).
-Hunk #3 succeeded at 237 (offset 16 lines).
-Hunk #4 succeeded at 265 (offset 16 lines).
-Hunk #5 succeeded at 310 (offset 16 lines).
+Hunk #2 succeeded at 236 (offset 16 lines).
 [1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 242][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg done.
 Type "launch" to generate events from this process, or see
@@ -256,9 +255,9 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.483s
-user	0m2.197s
-sys	0m0.283s
+real	0m2.549s
+user	0m2.267s
+sys	0m0.281s
 Code generation completed in 3 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/gg_ttg.mad/SubProcesses/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/MemoryAccessGs.h
+++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/gg_ttg.mad/SubProcesses/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 
diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc
index ed7203959e..3fa4e019da 100644
--- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc
+++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc
@@ -204,7 +204,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +218,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -330,11 +334,11 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 );
 
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -347,10 +351,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 16 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
+      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -361,10 +365,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 16 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -375,11 +379,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 16 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -389,10 +393,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 16 ***
 
       // Wavefunction(s) for diagram number 5
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -406,7 +410,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -416,11 +420,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 16 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -433,7 +437,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 8
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -447,7 +451,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 9
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -457,10 +461,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 16 ***
 
       // Wavefunction(s) for diagram number 10
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 10
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -474,7 +478,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 11
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -488,7 +492,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 12
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -504,7 +508,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 13
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -517,7 +521,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -530,7 +534,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 15
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -543,22 +547,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 16 OF 16 ***
 
       // Wavefunction(s) for diagram number 16
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 16
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[4] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/check_sa.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/check_sa.cc
+++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk
index 9cff5e1a60..0d8ea00f01 100644
--- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk
@@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
@@ -647,7 +650,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -776,6 +778,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
@@ -786,12 +796,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -962,6 +972,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
diff --git a/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h
index 5742cd4648..5efae129bb 100644
--- a/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h
@@ -1271,8 +1271,147 @@ namespace mg5amcCpu
     return;
   }
 
+  //==========================================================================
+
+#ifndef MGONGPU_LINKER_HELAMPS
+
+#define helas_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
+#define helas_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+
+#else
+
+#define helas_VVV1_0 linker_VVV1_0
+#define helas_VVV1P0_1 linker_VVV1P0_1
+#define helas_FFV1_0 linker_FFV1_0
+#define helas_FFV1_1 linker_FFV1_1
+#define helas_FFV1_2 linker_FFV1_2
+#define helas_FFV1P0_3 linker_FFV1P0_3
+#define helas_VVVV1P0_1 linker_VVVV1P0_1
+#define helas_VVVV3P0_1 linker_VVVV3P0_1
+#define helas_VVVV4P0_1 linker_VVVV4P0_1
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV1_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
   //--------------------------------------------------------------------------
 
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+#endif
+
+  //==========================================================================
+
 } // end namespace
 
 #endif // HelAmps_sm_H
diff --git a/epochX/cudacpp/gg_ttg.mad/src/cudacpp_config.mk b/epochX/cudacpp/gg_ttg.mad/src/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/gg_ttg.mad/src/cudacpp_config.mk
+++ b/epochX/cudacpp/gg_ttg.mad/src/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuConfig.h
index 858546db00..cf6a228859 100644
--- a/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuConfig.h
+++ b/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -78,9 +78,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -147,6 +154,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {
diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt
index a103152d0f..a38fcc455a 100644
--- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt
+++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~ g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.00570988655090332 [0m
+[1;32mDEBUG: model prefixing  takes 0.005728244781494141 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1  
 INFO: Process has 16 diagrams 
-1 processes with 16 diagrams generated in 0.021 s
+1 processes with 16 diagrams generated in 0.022 s
 Total: 1 processes with 16 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg
 Load PLUGIN.CUDACPP_OUTPUT
@@ -178,14 +178,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. 
-Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s
+Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 5 routines in  0.323 s
+ALOHA: aloha creates 5 routines in  0.334 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -197,6 +197,8 @@ ALOHA: aloha creates 5 routines in  0.323 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.h
@@ -205,7 +207,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. 
 quit
 
-real	0m0.774s
-user	0m0.711s
-sys	0m0.055s
+real	0m0.840s
+user	0m0.736s
+sys	0m0.067s
 Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/gg_ttg.sa/SubProcesses/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/MemoryAccessGs.h
+++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/gg_ttg.sa/SubProcesses/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 
diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc
index b290333757..23621b7b68 100644
--- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc
+++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc
@@ -204,7 +204,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +218,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -330,11 +334,11 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 );
 
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -346,10 +350,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 16 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
+      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -359,10 +363,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 16 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -372,11 +376,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 16 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -385,10 +389,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 16 ***
 
       // Wavefunction(s) for diagram number 5
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -401,7 +405,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -410,11 +414,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 16 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -426,7 +430,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 8
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -439,7 +443,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 9
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -448,10 +452,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 16 ***
 
       // Wavefunction(s) for diagram number 10
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 10
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -464,7 +468,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 11
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -477,7 +481,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 12
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -492,7 +496,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 13
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -504,7 +508,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -516,7 +520,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 15
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -528,12 +532,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 16 OF 16 ***
 
       // Wavefunction(s) for diagram number 16
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 16
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -541,7 +545,7 @@ namespace mg5amcCpu
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -549,7 +553,7 @@ namespace mg5amcCpu
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[4] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/check_sa.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/check_sa.cc
+++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk
index 9cff5e1a60..0d8ea00f01 100644
--- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk
@@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
@@ -647,7 +650,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -776,6 +778,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
@@ -786,12 +796,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -962,6 +972,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
diff --git a/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h
index 5742cd4648..5efae129bb 100644
--- a/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h
@@ -1271,8 +1271,147 @@ namespace mg5amcCpu
     return;
   }
 
+  //==========================================================================
+
+#ifndef MGONGPU_LINKER_HELAMPS
+
+#define helas_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
+#define helas_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+
+#else
+
+#define helas_VVV1_0 linker_VVV1_0
+#define helas_VVV1P0_1 linker_VVV1P0_1
+#define helas_FFV1_0 linker_FFV1_0
+#define helas_FFV1_1 linker_FFV1_1
+#define helas_FFV1_2 linker_FFV1_2
+#define helas_FFV1P0_3 linker_FFV1P0_3
+#define helas_VVVV1P0_1 linker_VVVV1P0_1
+#define helas_VVVV3P0_1 linker_VVVV3P0_1
+#define helas_VVVV4P0_1 linker_VVVV4P0_1
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV1_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
   //--------------------------------------------------------------------------
 
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+#endif
+
+  //==========================================================================
+
 } // end namespace
 
 #endif // HelAmps_sm_H
diff --git a/epochX/cudacpp/gg_ttg.sa/src/cudacpp_config.mk b/epochX/cudacpp/gg_ttg.sa/src/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/gg_ttg.sa/src/cudacpp_config.mk
+++ b/epochX/cudacpp/gg_ttg.sa/src/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuConfig.h b/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuConfig.h
index ad528bf8f3..5d79a575e7 100644
--- a/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuConfig.h
+++ b/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -78,9 +78,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -147,6 +154,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {
diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt
index 816c1d75f7..e3adcfcb3c 100644
--- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt
+++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~ g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.0055654048919677734 [0m
+[1;32mDEBUG: model prefixing  takes 0.005609273910522461 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1  
 INFO: Process has 123 diagrams 
-1 processes with 123 diagrams generated in 0.156 s
+1 processes with 123 diagrams generated in 0.165 s
 Total: 1 processes with 123 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -177,8 +177,8 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 
 INFO: Processing color information for process: g g > t t~ g g @1 
 INFO: Creating files in directory P1_gg_ttxgg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fce7b612ca0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f5273b8fe50> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -194,25 +194,25 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 105 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [1;30m[model_handling.py at line 1545][0m [0m
-Generated helas calls for 1 subprocesses (123 diagrams) in 0.428 s
-Wrote files for 222 helas calls in 0.706 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 105 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [1;30m[model_handling.py at line 1614][0m [0m
+Generated helas calls for 1 subprocesses (123 diagrams) in 0.441 s
+Wrote files for 222 helas calls in 0.733 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.333 s
+ALOHA: aloha creates 5 routines in  0.352 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 10 routines in  0.317 s
+ALOHA: aloha creates 10 routines in  0.330 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -227,6 +227,8 @@ ALOHA: aloha creates 10 routines in  0.317 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.h
@@ -248,10 +250,7 @@ DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg
 patching file auto_dsig1.f
 patching file driver.f
 patching file matrix1.f
-Hunk #2 succeeded at 191 (offset 48 lines).
-Hunk #3 succeeded at 269 (offset 48 lines).
-Hunk #4 succeeded at 297 (offset 48 lines).
-Hunk #5 succeeded at 342 (offset 48 lines).
+Hunk #2 succeeded at 268 (offset 48 lines).
 [1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 242][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg done.
 Type "launch" to generate events from this process, or see
@@ -259,9 +258,9 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m3.822s
-user	0m3.543s
-sys	0m0.260s
+real	0m4.003s
+user	0m3.676s
+sys	0m0.300s
 Code generation completed in 4 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MemoryAccessGs.h
+++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 
diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc
index 60c12d561b..2a065101ff 100644
--- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc
+++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc
@@ -204,7 +204,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +218,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -332,11 +336,11 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 );
 
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
 
       // Amplitude(s) for diagram number 1
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -345,7 +349,7 @@ namespace mg5amcCpu
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -354,7 +358,7 @@ namespace mg5amcCpu
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -367,10 +371,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 123 ***
 
       // Wavefunction(s) for diagram number 2
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[6], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[8] );
+      helas_VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[5], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -387,10 +391,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 123 ***
 
       // Wavefunction(s) for diagram number 3
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[6], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[9] );
+      helas_VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 3
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[4], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -407,10 +411,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 123 ***
 
       // Wavefunction(s) for diagram number 4
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 4
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -427,11 +431,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 123 ***
 
       // Wavefunction(s) for diagram number 5
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -445,7 +449,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -458,10 +462,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 123 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -472,10 +476,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 8 OF 123 ***
 
       // Wavefunction(s) for diagram number 8
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
 
       // Amplitude(s) for diagram number 8
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -489,7 +493,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 9
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[14], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -502,10 +506,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 123 ***
 
       // Wavefunction(s) for diagram number 10
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
+      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
 
       // Amplitude(s) for diagram number 10
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -516,10 +520,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 123 ***
 
       // Wavefunction(s) for diagram number 11
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
 
       // Amplitude(s) for diagram number 11
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[16], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -533,7 +537,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 12
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -549,7 +553,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 13
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[16], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -563,7 +567,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -579,7 +583,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 15
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -595,7 +599,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 16
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -608,12 +612,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 17 OF 123 ***
 
       // Wavefunction(s) for diagram number 17
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_FFV1_1( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 17
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[8], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -623,10 +627,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 18 OF 123 ***
 
       // Wavefunction(s) for diagram number 18
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_1( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 18
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -639,7 +643,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 19
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[12], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -650,11 +654,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 20 OF 123 ***
 
       // Wavefunction(s) for diagram number 20
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] );
+      helas_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] );
 
       // Amplitude(s) for diagram number 20
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[5], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -670,7 +674,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 21
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -684,7 +688,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 22
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[12], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -695,10 +699,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 23 OF 123 ***
 
       // Wavefunction(s) for diagram number 23
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[18] );
+      helas_VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[18] );
 
       // Amplitude(s) for diagram number 23
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[18], w_fp[4], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -714,7 +718,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 24
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[8], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -728,7 +732,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 25
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[12], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -739,10 +743,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 26 OF 123 ***
 
       // Wavefunction(s) for diagram number 26
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[19] );
+      helas_FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[19] );
 
       // Amplitude(s) for diagram number 26
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[19], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -755,7 +759,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 27
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -768,7 +772,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 28
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[19], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -781,7 +785,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 29
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[8], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -794,7 +798,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 30
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[19], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -808,7 +812,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 31
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[10], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -821,22 +825,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 32 OF 123 ***
 
       // Wavefunction(s) for diagram number 32
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[19] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[8] );
+      helas_VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] );
+      helas_VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[19] );
+      helas_VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 32
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[12], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[12], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[4] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[12], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
@@ -845,12 +849,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 33 OF 123 ***
 
       // Wavefunction(s) for diagram number 33
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
 
       // Amplitude(s) for diagram number 33
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -860,10 +864,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 34 OF 123 ***
 
       // Wavefunction(s) for diagram number 34
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_FFV1_2( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 34
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 34 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -876,7 +880,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 35
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[9], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -887,10 +891,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 36 OF 123 ***
 
       // Wavefunction(s) for diagram number 36
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[22] );
+      helas_FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[22] );
 
       // Amplitude(s) for diagram number 36
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -906,7 +910,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 37
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 37 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -920,7 +924,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 38
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 38 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -934,7 +938,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 39
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[18], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 39 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -950,7 +954,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 40
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 40 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -964,7 +968,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 41
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[11], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 41 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -975,10 +979,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 42 OF 123 ***
 
       // Wavefunction(s) for diagram number 42
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_FFV1_2( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
 
       // Amplitude(s) for diagram number 42
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[23], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 42 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -991,7 +995,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 43
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 43 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1004,7 +1008,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 44
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[23], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 44 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1017,7 +1021,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 45
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 45 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1030,7 +1034,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 46
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[23], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 46 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1044,7 +1048,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 47
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[10], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 47 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1060,17 +1064,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 48
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[21] += amp_sv[0];
@@ -1079,11 +1083,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 49 OF 123 ***
 
       // Wavefunction(s) for diagram number 49
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[12] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
+      helas_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[12] );
+      helas_FFV1_2( w_fp[3], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
 
       // Amplitude(s) for diagram number 49
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 49 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1094,10 +1098,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 50 OF 123 ***
 
       // Wavefunction(s) for diagram number 50
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[23] );
 
       // Amplitude(s) for diagram number 50
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 50 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1113,7 +1117,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 51
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[9], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 51 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1124,10 +1128,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 52 OF 123 ***
 
       // Wavefunction(s) for diagram number 52
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
+      helas_FFV1_1( w_fp[2], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
 
       // Amplitude(s) for diagram number 52
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 52 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1141,7 +1145,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 53
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 53 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1157,7 +1161,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 54
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 54 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1171,7 +1175,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 55
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 55 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1187,7 +1191,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 56
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 56 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1203,7 +1207,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 57
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[18], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 57 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1223,7 +1227,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 58
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1232,7 +1236,7 @@ namespace mg5amcCpu
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1241,7 +1245,7 @@ namespace mg5amcCpu
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1254,10 +1258,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 59 OF 123 ***
 
       // Wavefunction(s) for diagram number 59
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] );
+      helas_VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 59
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 59 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1277,7 +1281,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 60
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 60 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1297,7 +1301,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 61
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 61 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1313,7 +1317,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 62
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 62 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1327,7 +1331,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 63
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 63 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1343,7 +1347,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 64
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 64 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1354,11 +1358,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 65 OF 123 ***
 
       // Wavefunction(s) for diagram number 65
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] );
+      helas_FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 65
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 65 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1369,10 +1373,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 66 OF 123 ***
 
       // Wavefunction(s) for diagram number 66
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[20], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[22] );
+      helas_VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[22] );
 
       // Amplitude(s) for diagram number 66
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 66 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1388,7 +1392,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 67
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[9], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 67 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1399,10 +1403,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 68 OF 123 ***
 
       // Wavefunction(s) for diagram number 68
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
 
       // Amplitude(s) for diagram number 68
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 68 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1416,7 +1420,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 69
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 69 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1432,7 +1436,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 70
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[11], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 70 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1446,7 +1450,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 71
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[23], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 71 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1462,7 +1466,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 72
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 72 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1478,7 +1482,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 73
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[6], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 73 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1498,7 +1502,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 74
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1507,7 +1511,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1516,7 +1520,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1529,10 +1533,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 75 OF 123 ***
 
       // Wavefunction(s) for diagram number 75
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[12] );
+      helas_VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[12] );
 
       // Amplitude(s) for diagram number 75
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[4], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 75 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1552,7 +1556,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 76
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[7], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 76 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1572,7 +1576,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 77
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 77 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1588,7 +1592,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 78
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 78 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1602,7 +1606,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 79
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 79 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1618,7 +1622,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 80
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[23], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 80 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1629,10 +1633,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 81 OF 123 ***
 
       // Wavefunction(s) for diagram number 81
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[9], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_FFV1_1( w_fp[9], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
 
       // Amplitude(s) for diagram number 81
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 81 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1642,10 +1646,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 82 OF 123 ***
 
       // Wavefunction(s) for diagram number 82
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 82
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 82 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1658,7 +1662,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 83
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 83 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1668,10 +1672,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 84 OF 123 ***
 
       // Wavefunction(s) for diagram number 84
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 84
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 84 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1684,7 +1688,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 85
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[23], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 85 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1695,10 +1699,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 86 OF 123 ***
 
       // Wavefunction(s) for diagram number 86
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[23] );
 
       // Amplitude(s) for diagram number 86
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 86 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1711,10 +1715,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 87 OF 123 ***
 
       // Wavefunction(s) for diagram number 87
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
+      helas_FFV1_2( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
 
       // Amplitude(s) for diagram number 87
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 87 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1724,10 +1728,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 88 OF 123 ***
 
       // Wavefunction(s) for diagram number 88
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[11], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
+      helas_FFV1_1( w_fp[11], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
 
       // Amplitude(s) for diagram number 88
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 88 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1740,7 +1744,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 89
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 89 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1750,10 +1754,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 90 OF 123 ***
 
       // Wavefunction(s) for diagram number 90
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[14], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] );
+      helas_FFV1_1( w_fp[14], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] );
 
       // Amplitude(s) for diagram number 90
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[24], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 90 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1766,7 +1770,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 91
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 91 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1780,7 +1784,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 92
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 92 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1796,7 +1800,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 93
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1805,7 +1809,7 @@ namespace mg5amcCpu
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1814,7 +1818,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1827,10 +1831,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 94 OF 123 ***
 
       // Wavefunction(s) for diagram number 94
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[22] );
+      helas_VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[22] );
 
       // Amplitude(s) for diagram number 94
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 94 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1847,10 +1851,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 95 OF 123 ***
 
       // Wavefunction(s) for diagram number 95
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[25] );
+      helas_VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[25] );
 
       // Amplitude(s) for diagram number 95
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[5], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 95 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1870,7 +1874,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 96
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[14], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 96 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1886,7 +1890,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 97
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[24], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 97 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1900,7 +1904,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 98
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 98 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1916,7 +1920,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 99
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 99 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1930,7 +1934,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 100
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1939,7 +1943,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1948,7 +1952,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1961,10 +1965,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 101 OF 123 ***
 
       // Wavefunction(s) for diagram number 101
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[18], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 101
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[4], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 101 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1984,7 +1988,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 102
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[18], w_fp[4], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 102 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2004,7 +2008,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 103
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 103 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2020,7 +2024,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 104
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 104 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2034,7 +2038,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 105
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 105 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2050,7 +2054,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 106
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 106 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2064,7 +2068,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 107
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2073,7 +2077,7 @@ namespace mg5amcCpu
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2082,7 +2086,7 @@ namespace mg5amcCpu
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2098,7 +2102,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 108
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[10], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 108 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2118,7 +2122,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 109
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 109 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2138,7 +2142,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 110
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 110 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2151,7 +2155,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 111
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 111 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2164,7 +2168,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 112
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[24], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 112 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2177,7 +2181,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 113
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 113 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2187,12 +2191,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 114 OF 123 ***
 
       // Wavefunction(s) for diagram number 114
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[12] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[24] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[12] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[24] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 114
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2201,7 +2205,7 @@ namespace mg5amcCpu
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2210,7 +2214,7 @@ namespace mg5amcCpu
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2226,17 +2230,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 115
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] += amp_sv[0];
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[14], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
       jamp_sv[22] += amp_sv[0];
@@ -2248,17 +2252,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 116
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[12] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[12] += amp_sv[0];
@@ -2267,12 +2271,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 117 OF 123 ***
 
       // Wavefunction(s) for diagram number 117
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
 
       // Amplitude(s) for diagram number 117
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2281,7 +2285,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2290,7 +2294,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2306,17 +2310,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 118
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] += amp_sv[0];
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[16] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
       jamp_sv[16] += amp_sv[0];
@@ -2328,17 +2332,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 119
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[18] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[18] += amp_sv[0];
@@ -2347,22 +2351,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 120 OF 123 ***
 
       // Wavefunction(s) for diagram number 120
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
 
       // Amplitude(s) for diagram number 120
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[10] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
       jamp_sv[10] += amp_sv[0];
@@ -2374,17 +2378,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 121
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[19] += amp_sv[0];
@@ -2396,7 +2400,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 122
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2405,7 +2409,7 @@ namespace mg5amcCpu
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2414,7 +2418,7 @@ namespace mg5amcCpu
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2430,7 +2434,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 123
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[17], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2439,7 +2443,7 @@ namespace mg5amcCpu
       jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[19], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2448,7 +2452,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/check_sa.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/check_sa.cc
+++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk
index 9cff5e1a60..0d8ea00f01 100644
--- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk
@@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
@@ -647,7 +650,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -776,6 +778,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
@@ -786,12 +796,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -962,6 +972,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
diff --git a/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h
index bcf4333c78..24e8114e3a 100644
--- a/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h
@@ -1400,8 +1400,189 @@ namespace mg5amcCpu
     return;
   }
 
+  //==========================================================================
+
+#ifndef MGONGPU_LINKER_HELAMPS
+
+#define helas_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_VVVV3_0 VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
+#define helas_VVVV4_0 VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+
+#else
+
+#define helas_VVV1_0 linker_VVV1_0
+#define helas_VVV1P0_1 linker_VVV1P0_1
+#define helas_FFV1_0 linker_FFV1_0
+#define helas_FFV1_1 linker_FFV1_1
+#define helas_FFV1_2 linker_FFV1_2
+#define helas_FFV1P0_3 linker_FFV1P0_3
+#define helas_VVVV1_0 linker_VVVV1_0
+#define helas_VVVV1P0_1 linker_VVVV1P0_1
+#define helas_VVVV3_0 linker_VVVV3_0
+#define helas_VVVV3P0_1 linker_VVVV3P0_1
+#define helas_VVVV4_0 linker_VVVV4_0
+#define helas_VVVV4P0_1 linker_VVVV4P0_1
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV1_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
   //--------------------------------------------------------------------------
 
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+#endif
+
+  //==========================================================================
+
 } // end namespace
 
 #endif // HelAmps_sm_H
diff --git a/epochX/cudacpp/gg_ttgg.mad/src/cudacpp_config.mk b/epochX/cudacpp/gg_ttgg.mad/src/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/gg_ttgg.mad/src/cudacpp_config.mk
+++ b/epochX/cudacpp/gg_ttgg.mad/src/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuConfig.h
index 858546db00..cf6a228859 100644
--- a/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuConfig.h
+++ b/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -78,9 +78,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -147,6 +154,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {
diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt
index 5c8b6b0535..1b8b96727c 100644
--- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt
+++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~ g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.0053234100341796875 [0m
+[1;32mDEBUG: model prefixing  takes 0.005701541900634766 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1  
 INFO: Process has 123 diagrams 
-1 processes with 123 diagrams generated in 0.157 s
+1 processes with 123 diagrams generated in 0.164 s
 Total: 1 processes with 123 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg
 Load PLUGIN.CUDACPP_OUTPUT
@@ -178,14 +178,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. 
-Generated helas calls for 1 subprocesses (123 diagrams) in 0.430 s
+Generated helas calls for 1 subprocesses (123 diagrams) in 0.441 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.322 s
+ALOHA: aloha creates 5 routines in  0.329 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -200,6 +200,8 @@ ALOHA: aloha creates 5 routines in  0.322 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.h
@@ -209,6 +211,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg
 quit
 
 real	0m1.496s
-user	0m1.376s
-sys	0m0.058s
-Code generation completed in 1 seconds
+user	0m1.443s
+sys	0m0.044s
+Code generation completed in 2 seconds
diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MemoryAccessGs.h
+++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 
diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc
index a7596b22e4..a7cc5471b8 100644
--- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc
+++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc
@@ -204,7 +204,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +218,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -332,11 +336,11 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 );
 
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
 
       // Amplitude(s) for diagram number 1
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -348,7 +352,7 @@ namespace mg5amcCpu
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -360,7 +364,7 @@ namespace mg5amcCpu
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -376,10 +380,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 123 ***
 
       // Wavefunction(s) for diagram number 2
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[6], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[8] );
+      helas_VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[5], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -395,10 +399,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 123 ***
 
       // Wavefunction(s) for diagram number 3
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[6], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[9] );
+      helas_VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 3
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[4], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -414,10 +418,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 123 ***
 
       // Wavefunction(s) for diagram number 4
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 4
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -433,11 +437,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 123 ***
 
       // Wavefunction(s) for diagram number 5
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -450,7 +454,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -462,10 +466,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 123 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -475,10 +479,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 8 OF 123 ***
 
       // Wavefunction(s) for diagram number 8
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
 
       // Amplitude(s) for diagram number 8
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -491,7 +495,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 9
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[14], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -503,10 +507,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 123 ***
 
       // Wavefunction(s) for diagram number 10
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
+      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
 
       // Amplitude(s) for diagram number 10
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -516,10 +520,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 123 ***
 
       // Wavefunction(s) for diagram number 11
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
 
       // Amplitude(s) for diagram number 11
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[16], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -532,7 +536,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 12
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -547,7 +551,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 13
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[16], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -560,7 +564,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -575,7 +579,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 15
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -590,7 +594,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 16
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -602,12 +606,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 17 OF 123 ***
 
       // Wavefunction(s) for diagram number 17
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_FFV1_1( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 17
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[8], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -616,10 +620,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 18 OF 123 ***
 
       // Wavefunction(s) for diagram number 18
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_1( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 18
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -631,7 +635,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 19
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[12], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -641,11 +645,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 20 OF 123 ***
 
       // Wavefunction(s) for diagram number 20
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] );
+      helas_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] );
 
       // Amplitude(s) for diagram number 20
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[5], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -660,7 +664,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 21
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -673,7 +677,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 22
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[12], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -683,10 +687,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 23 OF 123 ***
 
       // Wavefunction(s) for diagram number 23
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[18] );
+      helas_VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[18] );
 
       // Amplitude(s) for diagram number 23
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[18], w_fp[4], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -701,7 +705,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 24
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[8], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -714,7 +718,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 25
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[12], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -724,10 +728,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 26 OF 123 ***
 
       // Wavefunction(s) for diagram number 26
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[19] );
+      helas_FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[19] );
 
       // Amplitude(s) for diagram number 26
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[19], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -739,7 +743,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 27
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -751,7 +755,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 28
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[19], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -763,7 +767,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 29
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[8], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -775,7 +779,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 30
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[19], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -788,7 +792,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 31
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[10], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -800,12 +804,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 32 OF 123 ***
 
       // Wavefunction(s) for diagram number 32
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[19] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[8] );
+      helas_VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] );
+      helas_VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[19] );
+      helas_VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 32
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[12], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -813,7 +817,7 @@ namespace mg5amcCpu
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[12], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -821,7 +825,7 @@ namespace mg5amcCpu
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[4] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[12], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -833,12 +837,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 33 OF 123 ***
 
       // Wavefunction(s) for diagram number 33
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
 
       // Amplitude(s) for diagram number 33
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -847,10 +851,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 34 OF 123 ***
 
       // Wavefunction(s) for diagram number 34
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_FFV1_2( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 34
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -862,7 +866,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 35
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[9], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -872,10 +876,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 36 OF 123 ***
 
       // Wavefunction(s) for diagram number 36
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[22] );
+      helas_FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[22] );
 
       // Amplitude(s) for diagram number 36
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -890,7 +894,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 37
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -903,7 +907,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 38
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -916,7 +920,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 39
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[18], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -931,7 +935,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 40
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -944,7 +948,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 41
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[11], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -954,10 +958,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 42 OF 123 ***
 
       // Wavefunction(s) for diagram number 42
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_FFV1_2( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
 
       // Amplitude(s) for diagram number 42
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[23], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -969,7 +973,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 43
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -981,7 +985,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 44
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[23], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -993,7 +997,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 45
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1005,7 +1009,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 46
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[23], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1018,7 +1022,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 47
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[10], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1033,7 +1037,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 48
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1041,7 +1045,7 @@ namespace mg5amcCpu
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1049,7 +1053,7 @@ namespace mg5amcCpu
       jamp_sv[15] += amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1061,11 +1065,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 49 OF 123 ***
 
       // Wavefunction(s) for diagram number 49
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[12] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
+      helas_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[12] );
+      helas_FFV1_2( w_fp[3], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
 
       // Amplitude(s) for diagram number 49
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1075,10 +1079,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 50 OF 123 ***
 
       // Wavefunction(s) for diagram number 50
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[23] );
 
       // Amplitude(s) for diagram number 50
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1093,7 +1097,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 51
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[9], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1103,10 +1107,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 52 OF 123 ***
 
       // Wavefunction(s) for diagram number 52
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
+      helas_FFV1_1( w_fp[2], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
 
       // Amplitude(s) for diagram number 52
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1119,7 +1123,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 53
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1134,7 +1138,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 54
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1147,7 +1151,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 55
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1162,7 +1166,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 56
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1177,7 +1181,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 57
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[18], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1196,7 +1200,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 58
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1208,7 +1212,7 @@ namespace mg5amcCpu
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1220,7 +1224,7 @@ namespace mg5amcCpu
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1236,10 +1240,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 59 OF 123 ***
 
       // Wavefunction(s) for diagram number 59
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] );
+      helas_VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 59
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1258,7 +1262,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 60
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1277,7 +1281,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 61
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1292,7 +1296,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 62
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1305,7 +1309,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 63
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1320,7 +1324,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 64
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1330,11 +1334,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 65 OF 123 ***
 
       // Wavefunction(s) for diagram number 65
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] );
+      helas_FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 65
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1344,10 +1348,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 66 OF 123 ***
 
       // Wavefunction(s) for diagram number 66
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[20], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[22] );
+      helas_VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[22] );
 
       // Amplitude(s) for diagram number 66
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1362,7 +1366,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 67
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[9], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1372,10 +1376,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 68 OF 123 ***
 
       // Wavefunction(s) for diagram number 68
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
 
       // Amplitude(s) for diagram number 68
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1388,7 +1392,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 69
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1403,7 +1407,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 70
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[11], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1416,7 +1420,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 71
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[23], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1431,7 +1435,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 72
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1446,7 +1450,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 73
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[6], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1465,7 +1469,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 74
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1477,7 +1481,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1489,7 +1493,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1505,10 +1509,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 75 OF 123 ***
 
       // Wavefunction(s) for diagram number 75
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[12] );
+      helas_VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[12] );
 
       // Amplitude(s) for diagram number 75
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[4], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1527,7 +1531,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 76
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[7], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1546,7 +1550,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 77
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1561,7 +1565,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 78
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1574,7 +1578,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 79
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1589,7 +1593,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 80
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[23], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1599,10 +1603,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 81 OF 123 ***
 
       // Wavefunction(s) for diagram number 81
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[9], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_FFV1_1( w_fp[9], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
 
       // Amplitude(s) for diagram number 81
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1611,10 +1615,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 82 OF 123 ***
 
       // Wavefunction(s) for diagram number 82
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 82
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1626,7 +1630,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 83
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1635,10 +1639,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 84 OF 123 ***
 
       // Wavefunction(s) for diagram number 84
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 84
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1650,7 +1654,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 85
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[23], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1660,10 +1664,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 86 OF 123 ***
 
       // Wavefunction(s) for diagram number 86
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[23] );
 
       // Amplitude(s) for diagram number 86
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1675,10 +1679,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 87 OF 123 ***
 
       // Wavefunction(s) for diagram number 87
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
+      helas_FFV1_2( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
 
       // Amplitude(s) for diagram number 87
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1687,10 +1691,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 88 OF 123 ***
 
       // Wavefunction(s) for diagram number 88
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[11], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
+      helas_FFV1_1( w_fp[11], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
 
       // Amplitude(s) for diagram number 88
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1702,7 +1706,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 89
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1711,10 +1715,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 90 OF 123 ***
 
       // Wavefunction(s) for diagram number 90
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[14], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] );
+      helas_FFV1_1( w_fp[14], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] );
 
       // Amplitude(s) for diagram number 90
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[24], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1726,7 +1730,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 91
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1739,7 +1743,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 92
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1754,7 +1758,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 93
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1766,7 +1770,7 @@ namespace mg5amcCpu
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1778,7 +1782,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1794,10 +1798,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 94 OF 123 ***
 
       // Wavefunction(s) for diagram number 94
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[22] );
+      helas_VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[22] );
 
       // Amplitude(s) for diagram number 94
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1813,10 +1817,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 95 OF 123 ***
 
       // Wavefunction(s) for diagram number 95
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[25] );
+      helas_VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[25] );
 
       // Amplitude(s) for diagram number 95
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[5], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1835,7 +1839,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 96
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[14], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1850,7 +1854,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 97
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[24], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1863,7 +1867,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 98
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1878,7 +1882,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 99
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1891,7 +1895,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 100
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1903,7 +1907,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1915,7 +1919,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1931,10 +1935,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 101 OF 123 ***
 
       // Wavefunction(s) for diagram number 101
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[18], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 101
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[4], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1953,7 +1957,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 102
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[18], w_fp[4], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1972,7 +1976,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 103
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1987,7 +1991,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 104
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2000,7 +2004,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 105
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2015,7 +2019,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 106
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2028,7 +2032,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 107
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2040,7 +2044,7 @@ namespace mg5amcCpu
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2052,7 +2056,7 @@ namespace mg5amcCpu
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2071,7 +2075,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 108
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[10], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2090,7 +2094,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 109
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2109,7 +2113,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 110
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2121,7 +2125,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 111
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2133,7 +2137,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 112
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[24], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2145,7 +2149,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 113
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2154,12 +2158,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 114 OF 123 ***
 
       // Wavefunction(s) for diagram number 114
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[12] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[24] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[12] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[24] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 114
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2171,7 +2175,7 @@ namespace mg5amcCpu
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2183,7 +2187,7 @@ namespace mg5amcCpu
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2202,7 +2206,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 115
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2210,7 +2214,7 @@ namespace mg5amcCpu
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[14], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2218,7 +2222,7 @@ namespace mg5amcCpu
       jamp_sv[20] += amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2233,7 +2237,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 116
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2241,7 +2245,7 @@ namespace mg5amcCpu
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2249,7 +2253,7 @@ namespace mg5amcCpu
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[12] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2261,12 +2265,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 117 OF 123 ***
 
       // Wavefunction(s) for diagram number 117
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
 
       // Amplitude(s) for diagram number 117
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2278,7 +2282,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2290,7 +2294,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2309,7 +2313,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 118
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2317,7 +2321,7 @@ namespace mg5amcCpu
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2325,7 +2329,7 @@ namespace mg5amcCpu
       jamp_sv[14] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[16] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2340,7 +2344,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 119
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2348,7 +2352,7 @@ namespace mg5amcCpu
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2356,7 +2360,7 @@ namespace mg5amcCpu
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[18] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2368,12 +2372,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 120 OF 123 ***
 
       // Wavefunction(s) for diagram number 120
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
 
       // Amplitude(s) for diagram number 120
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2381,7 +2385,7 @@ namespace mg5amcCpu
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2389,7 +2393,7 @@ namespace mg5amcCpu
       jamp_sv[8] += amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[10] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2404,7 +2408,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 121
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2412,7 +2416,7 @@ namespace mg5amcCpu
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2420,7 +2424,7 @@ namespace mg5amcCpu
       jamp_sv[13] += amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2435,7 +2439,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 122
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2447,7 +2451,7 @@ namespace mg5amcCpu
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2459,7 +2463,7 @@ namespace mg5amcCpu
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2478,7 +2482,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 123
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[17], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2490,7 +2494,7 @@ namespace mg5amcCpu
       jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[19], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2502,7 +2506,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/check_sa.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/check_sa.cc
+++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk
index 9cff5e1a60..0d8ea00f01 100644
--- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk
@@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
@@ -647,7 +650,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -776,6 +778,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
@@ -786,12 +796,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -962,6 +972,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
diff --git a/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h
index bcf4333c78..24e8114e3a 100644
--- a/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h
@@ -1400,8 +1400,189 @@ namespace mg5amcCpu
     return;
   }
 
+  //==========================================================================
+
+#ifndef MGONGPU_LINKER_HELAMPS
+
+#define helas_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_VVVV3_0 VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
+#define helas_VVVV4_0 VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+
+#else
+
+#define helas_VVV1_0 linker_VVV1_0
+#define helas_VVV1P0_1 linker_VVV1P0_1
+#define helas_FFV1_0 linker_FFV1_0
+#define helas_FFV1_1 linker_FFV1_1
+#define helas_FFV1_2 linker_FFV1_2
+#define helas_FFV1P0_3 linker_FFV1P0_3
+#define helas_VVVV1_0 linker_VVVV1_0
+#define helas_VVVV1P0_1 linker_VVVV1P0_1
+#define helas_VVVV3_0 linker_VVVV3_0
+#define helas_VVVV3P0_1 linker_VVVV3P0_1
+#define helas_VVVV4_0 linker_VVVV4_0
+#define helas_VVVV4P0_1 linker_VVVV4P0_1
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV1_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
   //--------------------------------------------------------------------------
 
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+#endif
+
+  //==========================================================================
+
 } // end namespace
 
 #endif // HelAmps_sm_H
diff --git a/epochX/cudacpp/gg_ttgg.sa/src/cudacpp_config.mk b/epochX/cudacpp/gg_ttgg.sa/src/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/gg_ttgg.sa/src/cudacpp_config.mk
+++ b/epochX/cudacpp/gg_ttgg.sa/src/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuConfig.h b/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuConfig.h
index ad528bf8f3..5d79a575e7 100644
--- a/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuConfig.h
+++ b/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -78,9 +78,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -147,6 +154,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {
diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt
index cf81051351..9668c060d9 100644
--- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt
+++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~ g g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005418062210083008 [0m
+[1;32mDEBUG: model prefixing  takes 0.005650043487548828 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1  
 INFO: Process has 1240 diagrams 
-1 processes with 1240 diagrams generated in 1.889 s
+1 processes with 1240 diagrams generated in 1.961 s
 Total: 1 processes with 1240 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -179,8 +179,8 @@ INFO: Processing color information for process: g g > t t~ g g g @1
 INFO: Creating files in directory P1_gg_ttxggg 
 INFO: Computing Color-Flow optimization [15120 term] 
 INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f22d74c8b50> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f628bf6bc10> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -196,25 +196,25 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 945 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [1;30m[model_handling.py at line 1545][0m [0m
-Generated helas calls for 1 subprocesses (1240 diagrams) in 6.527 s
-Wrote files for 2281 helas calls in 18.453 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 945 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [1;30m[model_handling.py at line 1614][0m [0m
+Generated helas calls for 1 subprocesses (1240 diagrams) in 6.828 s
+Wrote files for 2281 helas calls in 19.178 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.318 s
+ALOHA: aloha creates 5 routines in  0.334 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 10 routines in  0.355 s
+ALOHA: aloha creates 10 routines in  0.373 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -229,6 +229,8 @@ ALOHA: aloha creates 10 routines in  0.355 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.h
@@ -250,10 +252,7 @@ DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg
 patching file auto_dsig1.f
 patching file driver.f
 patching file matrix1.f
-Hunk #2 succeeded at 255 (offset 112 lines).
-Hunk #3 succeeded at 333 (offset 112 lines).
-Hunk #4 succeeded at 361 (offset 112 lines).
-Hunk #5 succeeded at 406 (offset 112 lines).
+Hunk #2 succeeded at 332 (offset 112 lines).
 [1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 242][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg done.
 Type "launch" to generate events from this process, or see
@@ -261,10 +260,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m32.580s
-user	0m32.015s
-sys	0m0.455s
-Code generation completed in 33 seconds
+real	0m33.910s
+user	0m33.348s
+sys	0m0.451s
+Code generation completed in 34 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MemoryAccessGs.h
+++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 
diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc
index 9b85b0fbf9..30587548a9 100644
--- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc
+++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc
@@ -204,7 +204,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +218,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -334,13 +338,13 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][6], +1, w_fp[6], 6 );
 
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[9] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_VVV1P0_1( w_fp[7], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[9] );
+      helas_VVV1P0_1( w_fp[8], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 1
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[9], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -365,10 +369,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 1240 ***
 
       // Wavefunction(s) for diagram number 2
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[11] );
+      helas_VVV1P0_1( w_fp[8], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 2
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[9], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -396,7 +400,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 3
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -413,7 +417,7 @@ namespace mg5amcCpu
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -430,7 +434,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[118] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[1] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -451,11 +455,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 1240 ***
 
       // Wavefunction(s) for diagram number 4
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[12] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[13] );
+      helas_VVV1P0_1( w_fp[7], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[12] );
+      helas_VVV1P0_1( w_fp[8], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[13] );
 
       // Amplitude(s) for diagram number 4
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[12], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -483,7 +487,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 5
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[12], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -511,7 +515,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -528,7 +532,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -545,7 +549,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[3] += amp_sv[0];
       jamp_sv[26] += amp_sv[0];
@@ -566,10 +570,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 1240 ***
 
       // Wavefunction(s) for diagram number 7
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[14] );
+      helas_VVV1P0_1( w_fp[7], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 7
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[14], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -597,7 +601,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 8
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[14], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -625,7 +629,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 9
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -642,7 +646,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[97] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -659,7 +663,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
       jamp_sv[28] += amp_sv[0];
@@ -680,12 +684,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 1240 ***
 
       // Wavefunction(s) for diagram number 10
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[16] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] );
+      helas_VVVV1P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] );
+      helas_VVVV3P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[16] );
+      helas_VVVV4P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] );
 
       // Amplitude(s) for diagram number 10
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[15], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[15], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
@@ -702,7 +706,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -719,7 +723,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -740,12 +744,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 1240 ***
 
       // Wavefunction(s) for diagram number 11
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[18] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[19] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[20] );
+      helas_VVVV1P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[18] );
+      helas_VVVV3P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[19] );
+      helas_VVVV4P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[20] );
 
       // Amplitude(s) for diagram number 11
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[18], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[18], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[25] -= amp_sv[0];
@@ -762,7 +766,7 @@ namespace mg5amcCpu
       jamp_sv[95] += amp_sv[0];
       jamp_sv[108] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -779,7 +783,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -800,12 +804,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 1240 ***
 
       // Wavefunction(s) for diagram number 12
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] );
+      helas_VVVV1P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_VVVV3P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] );
+      helas_VVVV4P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] );
 
       // Amplitude(s) for diagram number 12
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
@@ -822,7 +826,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[114] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -839,7 +843,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[97] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -860,10 +864,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 13 OF 1240 ***
 
       // Wavefunction(s) for diagram number 13
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[24] );
+      helas_VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[24] );
 
       // Amplitude(s) for diagram number 13
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
@@ -880,7 +884,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
@@ -897,7 +901,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[28] -= amp_sv[0];
@@ -918,10 +922,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 14 OF 1240 ***
 
       // Wavefunction(s) for diagram number 14
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[25] );
+      helas_VVV1P0_1( w_fp[7], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[25] );
 
       // Amplitude(s) for diagram number 14
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[6], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -946,10 +950,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 15 OF 1240 ***
 
       // Wavefunction(s) for diagram number 15
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[26] );
+      helas_VVV1P0_1( w_fp[7], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[26] );
 
       // Amplitude(s) for diagram number 15
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[26], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[26], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -977,7 +981,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 16
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[24], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[24], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1002,10 +1006,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 17 OF 1240 ***
 
       // Wavefunction(s) for diagram number 17
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[27] );
+      helas_VVV1P0_1( w_fp[4], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[27] );
 
       // Amplitude(s) for diagram number 17
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[3] += amp_sv[0];
@@ -1022,7 +1026,7 @@ namespace mg5amcCpu
       jamp_sv[95] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[25] -= amp_sv[0];
@@ -1039,7 +1043,7 @@ namespace mg5amcCpu
       jamp_sv[95] += amp_sv[0];
       jamp_sv[108] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[26] -= amp_sv[0];
@@ -1063,7 +1067,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 18
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[5], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1088,10 +1092,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 19 OF 1240 ***
 
       // Wavefunction(s) for diagram number 19
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[28] );
+      helas_VVV1P0_1( w_fp[7], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[28] );
 
       // Amplitude(s) for diagram number 19
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[28], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[28], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1119,7 +1123,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 20
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[27], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[27], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1144,10 +1148,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 21 OF 1240 ***
 
       // Wavefunction(s) for diagram number 21
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] );
+      helas_VVV1P0_1( w_fp[5], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] );
 
       // Amplitude(s) for diagram number 21
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -1164,7 +1168,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
@@ -1181,7 +1185,7 @@ namespace mg5amcCpu
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
@@ -1205,7 +1209,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 22
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[29], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1233,7 +1237,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 23
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[29], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[29], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1258,10 +1262,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 24 OF 1240 ***
 
       // Wavefunction(s) for diagram number 24
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[25] );
+      helas_VVV1P0_1( w_fp[7], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[25] );
 
       // Amplitude(s) for diagram number 24
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1286,12 +1290,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 25 OF 1240 ***
 
       // Wavefunction(s) for diagram number 25
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[30] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[31] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[32] );
+      helas_VVVV1P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[30] );
+      helas_VVVV3P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[31] );
+      helas_VVVV4P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[32] );
 
       // Amplitude(s) for diagram number 25
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[30], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[8], w_fp[30], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -1308,7 +1312,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[31], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[8], w_fp[31], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -1325,7 +1329,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[32], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[8], w_fp[32], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
@@ -1346,12 +1350,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 26 OF 1240 ***
 
       // Wavefunction(s) for diagram number 26
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[33] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[33], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[35] );
+      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[33] );
+      helas_FFV1_2( w_fp[3], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
+      helas_FFV1_1( w_fp[33], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[35] );
 
       // Amplitude(s) for diagram number 26
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1362,10 +1366,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 27 OF 1240 ***
 
       // Wavefunction(s) for diagram number 27
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[33], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[36] );
+      helas_FFV1_1( w_fp[33], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[36] );
 
       // Amplitude(s) for diagram number 27
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1376,10 +1380,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 28 OF 1240 ***
 
       // Wavefunction(s) for diagram number 28
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[37] );
+      helas_FFV1P0_3( w_fp[3], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[37] );
 
       // Amplitude(s) for diagram number 28
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[12], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1399,7 +1403,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 29
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[36], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[36], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1415,7 +1419,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 30
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[14], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1435,7 +1439,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 31
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[35], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[35], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1451,7 +1455,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 32
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1460,7 +1464,7 @@ namespace mg5amcCpu
       jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1469,7 +1473,7 @@ namespace mg5amcCpu
       jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1482,11 +1486,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 33 OF 1240 ***
 
       // Wavefunction(s) for diagram number 33
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[38] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[33], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] );
+      helas_FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[38] );
+      helas_FFV1_1( w_fp[33], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] );
 
       // Amplitude(s) for diagram number 33
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1497,10 +1501,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 34 OF 1240 ***
 
       // Wavefunction(s) for diagram number 34
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[38], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] );
+      helas_FFV1_2( w_fp[38], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] );
 
       // Amplitude(s) for diagram number 34
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 34 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1514,7 +1518,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 35
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[33], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1527,10 +1531,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 36 OF 1240 ***
 
       // Wavefunction(s) for diagram number 36
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[41] );
+      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[41] );
 
       // Amplitude(s) for diagram number 36
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[39], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1541,10 +1545,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 37 OF 1240 ***
 
       // Wavefunction(s) for diagram number 37
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[41], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[42] );
+      helas_FFV1_2( w_fp[41], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[42] );
 
       // Amplitude(s) for diagram number 37
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[42], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[42], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 37 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1558,7 +1562,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 38
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[33], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 38 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1574,7 +1578,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 39
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 39 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1590,7 +1594,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 40
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 40 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1606,7 +1610,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 41
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[25], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[25], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 41 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1623,11 +1627,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 42 OF 1240 ***
 
       // Wavefunction(s) for diagram number 42
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[39], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[43] );
+      helas_FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] );
+      helas_FFV1_1( w_fp[39], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[43] );
 
       // Amplitude(s) for diagram number 42
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 42 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1638,10 +1642,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 43 OF 1240 ***
 
       // Wavefunction(s) for diagram number 43
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[39], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[44] );
+      helas_FFV1_1( w_fp[39], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[44] );
 
       // Amplitude(s) for diagram number 43
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 43 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1652,10 +1656,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 44 OF 1240 ***
 
       // Wavefunction(s) for diagram number 44
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[45] );
+      helas_FFV1P0_3( w_fp[3], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[45] );
 
       // Amplitude(s) for diagram number 44
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[9], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 44 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1675,7 +1679,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 45
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[44], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[44], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 45 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1691,7 +1695,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 46
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[14], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 46 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1711,7 +1715,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 47
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[43], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[43], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 47 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1727,7 +1731,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 48
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1736,7 +1740,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1745,7 +1749,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1758,11 +1762,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 49 OF 1240 ***
 
       // Wavefunction(s) for diagram number 49
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[46] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[39], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] );
+      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[46] );
+      helas_FFV1_1( w_fp[39], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] );
 
       // Amplitude(s) for diagram number 49
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[47], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 49 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1773,10 +1777,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 50 OF 1240 ***
 
       // Wavefunction(s) for diagram number 50
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[46], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] );
+      helas_FFV1_2( w_fp[46], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] );
 
       // Amplitude(s) for diagram number 50
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 50 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1790,7 +1794,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 51
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[39], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 51 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1806,7 +1810,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 52
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 52 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1820,7 +1824,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 53
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[42], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[42], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 53 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1834,7 +1838,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 54
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[39], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 54 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1850,7 +1854,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 55
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 55 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1866,7 +1870,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 56
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 56 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1882,7 +1886,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 57
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[28], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[28], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 57 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1899,11 +1903,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 58 OF 1240 ***
 
       // Wavefunction(s) for diagram number 58
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[47], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[49] );
+      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] );
+      helas_FFV1_1( w_fp[47], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[49] );
 
       // Amplitude(s) for diagram number 58
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 58 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1914,10 +1918,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 59 OF 1240 ***
 
       // Wavefunction(s) for diagram number 59
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[47], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[50] );
+      helas_FFV1_1( w_fp[47], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[50] );
 
       // Amplitude(s) for diagram number 59
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 59 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1928,10 +1932,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 60 OF 1240 ***
 
       // Wavefunction(s) for diagram number 60
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[51] );
+      helas_FFV1P0_3( w_fp[3], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[51] );
 
       // Amplitude(s) for diagram number 60
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[9], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 60 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1951,7 +1955,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 61
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[50], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[50], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 61 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1967,7 +1971,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 62
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[12], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 62 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1987,7 +1991,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 63
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[49], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[49], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 63 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2003,7 +2007,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 64
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2012,7 +2016,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2021,7 +2025,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2034,10 +2038,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 65 OF 1240 ***
 
       // Wavefunction(s) for diagram number 65
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[47], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
+      helas_FFV1_1( w_fp[47], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
 
       // Amplitude(s) for diagram number 65
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 65 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2051,7 +2055,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 66
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 66 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2065,7 +2069,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 67
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[47], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 67 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2081,7 +2085,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 68
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 68 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2095,7 +2099,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 69
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 69 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2109,7 +2113,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 70
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[47], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 70 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2125,7 +2129,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 71
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[52], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 71 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2141,7 +2145,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 72
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 72 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2157,7 +2161,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 73
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[26], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[26], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 73 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2174,11 +2178,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 74 OF 1240 ***
 
       // Wavefunction(s) for diagram number 74
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[46], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
+      helas_FFV1_1( w_fp[2], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
+      helas_FFV1_2( w_fp[46], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
 
       // Amplitude(s) for diagram number 74
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 74 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2189,10 +2193,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 75 OF 1240 ***
 
       // Wavefunction(s) for diagram number 75
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[46], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[53] );
+      helas_FFV1_2( w_fp[46], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[53] );
 
       // Amplitude(s) for diagram number 75
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 75 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2203,10 +2207,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 76 OF 1240 ***
 
       // Wavefunction(s) for diagram number 76
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[54] );
+      helas_FFV1P0_3( w_fp[46], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[54] );
 
       // Amplitude(s) for diagram number 76
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[12], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 76 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2226,7 +2230,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 77
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 77 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2242,7 +2246,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 78
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[14], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 78 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2262,7 +2266,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 79
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 79 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2278,7 +2282,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 80
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2287,7 +2291,7 @@ namespace mg5amcCpu
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2296,7 +2300,7 @@ namespace mg5amcCpu
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2312,7 +2316,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 81
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[52], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[52], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 81 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2328,7 +2332,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 82
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 82 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2344,7 +2348,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 83
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[25], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[25], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 83 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2361,10 +2365,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 84 OF 1240 ***
 
       // Wavefunction(s) for diagram number 84
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[38], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[25] );
+      helas_FFV1_2( w_fp[38], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[25] );
 
       // Amplitude(s) for diagram number 84
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 84 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2375,10 +2379,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 85 OF 1240 ***
 
       // Wavefunction(s) for diagram number 85
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[38], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] );
+      helas_FFV1_2( w_fp[38], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] );
 
       // Amplitude(s) for diagram number 85
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 85 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2389,10 +2393,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 86 OF 1240 ***
 
       // Wavefunction(s) for diagram number 86
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[23] );
+      helas_FFV1P0_3( w_fp[38], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[23] );
 
       // Amplitude(s) for diagram number 86
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[9], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 86 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2412,7 +2416,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 87
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 87 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2428,7 +2432,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 88
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[14], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 88 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2448,7 +2452,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 89
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 89 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2464,7 +2468,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 90
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2473,7 +2477,7 @@ namespace mg5amcCpu
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2482,7 +2486,7 @@ namespace mg5amcCpu
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2498,7 +2502,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 91
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[52], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[52], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 91 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2514,7 +2518,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 92
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 92 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2530,7 +2534,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 93
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[28], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[28], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 93 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2547,10 +2551,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 94 OF 1240 ***
 
       // Wavefunction(s) for diagram number 94
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[41], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[28] );
+      helas_FFV1_2( w_fp[41], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[28] );
 
       // Amplitude(s) for diagram number 94
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 94 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2561,10 +2565,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 95 OF 1240 ***
 
       // Wavefunction(s) for diagram number 95
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[41], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] );
+      helas_FFV1_2( w_fp[41], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] );
 
       // Amplitude(s) for diagram number 95
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 95 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2575,10 +2579,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 96 OF 1240 ***
 
       // Wavefunction(s) for diagram number 96
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[20] );
+      helas_FFV1P0_3( w_fp[41], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[20] );
 
       // Amplitude(s) for diagram number 96
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[9], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 96 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2598,7 +2602,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 97
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 97 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2614,7 +2618,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 98
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[12], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 98 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2634,7 +2638,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 99
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 99 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2650,7 +2654,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 100
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2659,7 +2663,7 @@ namespace mg5amcCpu
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2668,7 +2672,7 @@ namespace mg5amcCpu
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2684,7 +2688,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 101
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[52], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[52], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 101 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2700,7 +2704,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 102
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[42], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[42], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 102 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2716,7 +2720,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 103
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[26], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[26], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 103 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2733,10 +2737,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 104 OF 1240 ***
 
       // Wavefunction(s) for diagram number 104
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[26] );
+      helas_FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[26] );
 
       // Amplitude(s) for diagram number 104
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 104 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2749,10 +2753,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 105 OF 1240 ***
 
       // Wavefunction(s) for diagram number 105
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[42] );
+      helas_VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[42] );
 
       // Amplitude(s) for diagram number 105
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[52], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 105 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2769,10 +2773,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 106 OF 1240 ***
 
       // Wavefunction(s) for diagram number 106
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
+      helas_FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
 
       // Amplitude(s) for diagram number 106
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 106 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2788,7 +2792,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 107
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 107 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2808,7 +2812,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 108
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[17], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[17], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 108 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2828,7 +2832,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 109
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 109 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2845,10 +2849,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 110 OF 1240 ***
 
       // Wavefunction(s) for diagram number 110
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
 
       // Amplitude(s) for diagram number 110
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 110 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2861,10 +2865,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 111 OF 1240 ***
 
       // Wavefunction(s) for diagram number 111
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] );
+      helas_VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] );
 
       // Amplitude(s) for diagram number 111
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[52], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 111 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2881,10 +2885,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 112 OF 1240 ***
 
       // Wavefunction(s) for diagram number 112
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
+      helas_FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
 
       // Amplitude(s) for diagram number 112
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 112 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2900,7 +2904,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 113
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 113 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2920,7 +2924,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 114
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[15], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[15], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 114 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2940,7 +2944,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 115
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 115 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2957,10 +2961,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 116 OF 1240 ***
 
       // Wavefunction(s) for diagram number 116
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 116
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 116 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2973,10 +2977,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 117 OF 1240 ***
 
       // Wavefunction(s) for diagram number 117
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[19] );
+      helas_VVV1P0_1( w_fp[4], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[19] );
 
       // Amplitude(s) for diagram number 117
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[52], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 117 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2993,10 +2997,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 118 OF 1240 ***
 
       // Wavefunction(s) for diagram number 118
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[18] );
+      helas_FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[18] );
 
       // Amplitude(s) for diagram number 118
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 118 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3012,7 +3016,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 119
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 119 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3032,7 +3036,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 120
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[18], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[18], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 120 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3052,7 +3056,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 121
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 121 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3072,7 +3076,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 122
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[52], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
@@ -3081,7 +3085,7 @@ namespace mg5amcCpu
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[52], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
@@ -3090,7 +3094,7 @@ namespace mg5amcCpu
       jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[52], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -3106,7 +3110,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 123
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[64] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0];
@@ -3115,7 +3119,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[71] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -3124,7 +3128,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -3137,13 +3141,13 @@ namespace mg5amcCpu
       // *** DIAGRAM 124 OF 1240 ***
 
       // Wavefunction(s) for diagram number 124
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[34], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[52], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
+      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
+      helas_FFV1_1( w_fp[34], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_2( w_fp[52], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
 
       // Amplitude(s) for diagram number 124
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 124 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3153,10 +3157,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 125 OF 1240 ***
 
       // Wavefunction(s) for diagram number 125
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[52], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_FFV1_2( w_fp[52], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 125
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 125 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3166,11 +3170,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 126 OF 1240 ***
 
       // Wavefunction(s) for diagram number 126
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[34], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[55] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[52], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[56] );
+      helas_FFV1_1( w_fp[34], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[55] );
+      helas_FFV1_2( w_fp[52], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[56] );
 
       // Amplitude(s) for diagram number 126
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 126 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3183,7 +3187,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 127
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 127 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3193,10 +3197,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 128 OF 1240 ***
 
       // Wavefunction(s) for diagram number 128
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[34], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[57] );
+      helas_FFV1_1( w_fp[34], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[57] );
 
       // Amplitude(s) for diagram number 128
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 128 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3209,7 +3213,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 129
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 129 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3219,10 +3223,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 130 OF 1240 ***
 
       // Wavefunction(s) for diagram number 130
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[52], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[58] );
+      helas_FFV1P0_3( w_fp[52], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[58] );
 
       // Amplitude(s) for diagram number 130
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[6], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 130 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3235,10 +3239,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 131 OF 1240 ***
 
       // Wavefunction(s) for diagram number 131
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[34], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
+      helas_FFV1_1( w_fp[34], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
 
       // Amplitude(s) for diagram number 131
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 131 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3252,7 +3256,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 132
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[57], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[57], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 132 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3266,7 +3270,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 133
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[5], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 133 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3279,10 +3283,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 134 OF 1240 ***
 
       // Wavefunction(s) for diagram number 134
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[34], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
+      helas_FFV1_1( w_fp[34], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
 
       // Amplitude(s) for diagram number 134
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[60], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[60], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 134 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3296,7 +3300,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 135
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[55], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[55], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 135 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3310,7 +3314,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 136
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[29], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 136 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3326,7 +3330,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 137
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[9], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[9], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 137 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3337,10 +3341,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 138 OF 1240 ***
 
       // Wavefunction(s) for diagram number 138
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[34], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] );
+      helas_FFV1_1( w_fp[34], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] );
 
       // Amplitude(s) for diagram number 138
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[58], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[58], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 138 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3354,17 +3358,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 139
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[34], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[34], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[34], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[34], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[34], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[34], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[21] += amp_sv[0];
@@ -3373,12 +3377,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 140 OF 1240 ***
 
       // Wavefunction(s) for diagram number 140
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[61] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[62] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[63] );
+      helas_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[61] );
+      helas_FFV1P0_3( w_fp[3], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[62] );
+      helas_VVV1P0_1( w_fp[61], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[63] );
 
       // Amplitude(s) for diagram number 140
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 140 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3395,10 +3399,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 141 OF 1240 ***
 
       // Wavefunction(s) for diagram number 141
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[61], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[64] );
+      helas_VVV1P0_1( w_fp[61], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[64] );
 
       // Amplitude(s) for diagram number 141
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 141 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3418,7 +3422,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 142
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
@@ -3427,7 +3431,7 @@ namespace mg5amcCpu
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
@@ -3436,7 +3440,7 @@ namespace mg5amcCpu
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -3449,10 +3453,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 143 OF 1240 ***
 
       // Wavefunction(s) for diagram number 143
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[65] );
+      helas_FFV1_2( w_fp[3], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[65] );
 
       // Amplitude(s) for diagram number 143
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 143 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3466,7 +3470,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 144
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[55], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 144 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3482,7 +3486,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 145
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 145 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3496,7 +3500,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 146
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[57], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 146 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3509,10 +3513,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 147 OF 1240 ***
 
       // Wavefunction(s) for diagram number 147
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[34], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] );
+      helas_FFV1_1( w_fp[34], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] );
 
       // Amplitude(s) for diagram number 147
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[66], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[66], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 147 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3523,10 +3527,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 148 OF 1240 ***
 
       // Wavefunction(s) for diagram number 148
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[38], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[67] );
+      helas_FFV1P0_3( w_fp[38], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[67] );
 
       // Amplitude(s) for diagram number 148
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[6], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 148 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3542,7 +3546,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 149
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[57], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[57], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 149 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3556,7 +3560,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 150
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[66], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[66], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 150 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3567,10 +3571,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 151 OF 1240 ***
 
       // Wavefunction(s) for diagram number 151
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[41], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[68] );
+      helas_FFV1P0_3( w_fp[41], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[68] );
 
       // Amplitude(s) for diagram number 151
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 151 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3586,7 +3590,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 152
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[55], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[55], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 152 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3600,7 +3604,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 153
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[66], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[66], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 153 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3616,7 +3620,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 154
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[29], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[29], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 154 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3636,7 +3640,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 155
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[58], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[58], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 155 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3649,11 +3653,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 156 OF 1240 ***
 
       // Wavefunction(s) for diagram number 156
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[66] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[69] );
+      helas_VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[66] );
+      helas_VVV1P0_1( w_fp[66], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[69] );
 
       // Amplitude(s) for diagram number 156
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 156 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3670,10 +3674,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 157 OF 1240 ***
 
       // Wavefunction(s) for diagram number 157
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[66], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[70] );
+      helas_VVV1P0_1( w_fp[66], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[70] );
 
       // Amplitude(s) for diagram number 157
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 157 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3693,7 +3697,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 158
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -3702,7 +3706,7 @@ namespace mg5amcCpu
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -3711,7 +3715,7 @@ namespace mg5amcCpu
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -3724,10 +3728,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 159 OF 1240 ***
 
       // Wavefunction(s) for diagram number 159
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
+      helas_FFV1_2( w_fp[3], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
 
       // Amplitude(s) for diagram number 159
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 159 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3741,7 +3745,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 160
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 160 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3757,7 +3761,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 161
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 161 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3771,7 +3775,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 162
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[57], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 162 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3784,10 +3788,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 163 OF 1240 ***
 
       // Wavefunction(s) for diagram number 163
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[34], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] );
+      helas_FFV1_1( w_fp[34], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] );
 
       // Amplitude(s) for diagram number 163
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[72], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[72], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 163 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3798,10 +3802,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 164 OF 1240 ***
 
       // Wavefunction(s) for diagram number 164
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[46], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[73] );
+      helas_FFV1P0_3( w_fp[46], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[73] );
 
       // Amplitude(s) for diagram number 164
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[6], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 164 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3817,7 +3821,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 165
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[57], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[57], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 165 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3831,7 +3835,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 166
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[72], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[72], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 166 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3845,7 +3849,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 167
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 167 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3861,7 +3865,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 168
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[9], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[9], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 168 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3875,7 +3879,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 169
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[72], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[72], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 169 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3891,7 +3895,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 170
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[27], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[27], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 170 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3911,7 +3915,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 171
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[60], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[60], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 171 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3924,11 +3928,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 172 OF 1240 ***
 
       // Wavefunction(s) for diagram number 172
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[72] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[74] );
+      helas_VVV1P0_1( w_fp[1], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[72] );
+      helas_VVV1P0_1( w_fp[72], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[74] );
 
       // Amplitude(s) for diagram number 172
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 172 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3945,10 +3949,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 173 OF 1240 ***
 
       // Wavefunction(s) for diagram number 173
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[72], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[75] );
+      helas_VVV1P0_1( w_fp[72], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[75] );
 
       // Amplitude(s) for diagram number 173
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 173 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3968,7 +3972,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 174
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -3977,7 +3981,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -3986,7 +3990,7 @@ namespace mg5amcCpu
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -3999,10 +4003,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 175 OF 1240 ***
 
       // Wavefunction(s) for diagram number 175
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[76] );
+      helas_FFV1_2( w_fp[3], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[76] );
 
       // Amplitude(s) for diagram number 175
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 175 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4016,7 +4020,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 176
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 176 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4032,7 +4036,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 177
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 177 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4046,7 +4050,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 178
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[55], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 178 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4059,10 +4063,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 179 OF 1240 ***
 
       // Wavefunction(s) for diagram number 179
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[34], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
+      helas_FFV1_1( w_fp[34], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
 
       // Amplitude(s) for diagram number 179
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 179 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4076,7 +4080,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 180
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[5], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 180 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4092,7 +4096,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 181
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[55], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[55], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 181 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4106,7 +4110,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 182
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 182 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4120,7 +4124,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 183
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 183 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4136,7 +4140,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 184
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[9], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[9], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 184 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4150,7 +4154,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 185
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 185 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4166,7 +4170,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 186
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[24], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[24], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 186 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4186,7 +4190,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 187
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 187 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4199,10 +4203,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 188 OF 1240 ***
 
       // Wavefunction(s) for diagram number 188
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[34], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
+      helas_FFV1_1( w_fp[34], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
 
       // Amplitude(s) for diagram number 188
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 188 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4215,7 +4219,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 189
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 189 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4225,10 +4229,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 190 OF 1240 ***
 
       // Wavefunction(s) for diagram number 190
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[46], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[78] );
+      helas_FFV1_2( w_fp[46], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[78] );
 
       // Amplitude(s) for diagram number 190
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 190 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4241,7 +4245,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 191
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 191 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4254,7 +4258,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 192
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 192 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4267,7 +4271,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 193
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 193 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4280,7 +4284,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 194
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 194 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4294,7 +4298,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 195
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[29], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 195 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4310,7 +4314,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 196
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[58], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[58], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 196 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4324,7 +4328,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 197
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 197 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4337,7 +4341,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 198
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 198 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4347,10 +4351,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 199 OF 1240 ***
 
       // Wavefunction(s) for diagram number 199
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[38], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] );
+      helas_FFV1_2( w_fp[38], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] );
 
       // Amplitude(s) for diagram number 199
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 199 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4363,7 +4367,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 200
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 200 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4376,7 +4380,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 201
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 201 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4389,7 +4393,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 202
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 202 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4402,7 +4406,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 203
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 203 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4416,7 +4420,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 204
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 204 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4432,7 +4436,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 205
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[60], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[60], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 205 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4446,7 +4450,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 206
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 206 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4459,7 +4463,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 207
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 207 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4469,10 +4473,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 208 OF 1240 ***
 
       // Wavefunction(s) for diagram number 208
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[41], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
+      helas_FFV1_2( w_fp[41], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
 
       // Amplitude(s) for diagram number 208
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 208 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4485,7 +4489,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 209
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 209 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4498,7 +4502,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 210
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 210 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4511,7 +4515,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 211
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 211 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4524,7 +4528,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 212
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 212 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4538,7 +4542,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 213
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 213 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4554,7 +4558,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 214
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 214 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4568,7 +4572,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 215
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 215 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4582,7 +4586,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 216
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 216 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4595,10 +4599,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 217 OF 1240 ***
 
       // Wavefunction(s) for diagram number 217
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[59] );
+      helas_VVV1P0_1( w_fp[1], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[59] );
 
       // Amplitude(s) for diagram number 217
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 217 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4618,7 +4622,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 218
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 218 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4638,7 +4642,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 219
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -4647,7 +4651,7 @@ namespace mg5amcCpu
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -4656,7 +4660,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -4672,7 +4676,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 220
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[57], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 220 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4688,7 +4692,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 221
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 221 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4702,7 +4706,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 222
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 222 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4716,7 +4720,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 223
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 223 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4729,10 +4733,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 224 OF 1240 ***
 
       // Wavefunction(s) for diagram number 224
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[68] );
+      helas_VVV1P0_1( w_fp[1], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[68] );
 
       // Amplitude(s) for diagram number 224
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 224 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4752,7 +4756,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 225
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 225 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4772,7 +4776,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 226
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
@@ -4781,7 +4785,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
@@ -4790,7 +4794,7 @@ namespace mg5amcCpu
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
@@ -4806,7 +4810,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 227
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[55], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 227 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4822,7 +4826,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 228
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 228 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4836,7 +4840,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 229
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 229 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4850,7 +4854,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 230
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 230 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4863,10 +4867,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 231 OF 1240 ***
 
       // Wavefunction(s) for diagram number 231
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[67] );
+      helas_VVV1P0_1( w_fp[1], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[67] );
 
       // Amplitude(s) for diagram number 231
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 231 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4886,7 +4890,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 232
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 232 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4906,7 +4910,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 233
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
@@ -4915,7 +4919,7 @@ namespace mg5amcCpu
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
@@ -4924,7 +4928,7 @@ namespace mg5amcCpu
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -4940,7 +4944,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 234
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 234 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4956,7 +4960,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 235
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 235 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4967,12 +4971,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 236 OF 1240 ***
 
       // Wavefunction(s) for diagram number 236
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[73] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[79] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[80] );
+      helas_VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[73] );
+      helas_VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[79] );
+      helas_VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[80] );
 
       // Amplitude(s) for diagram number 236
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[73], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[73], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -4981,7 +4985,7 @@ namespace mg5amcCpu
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[79], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[79], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -4990,7 +4994,7 @@ namespace mg5amcCpu
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[80], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[80], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -5006,17 +5010,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 237
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[57], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] += amp_sv[0];
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[57], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[57], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
       jamp_sv[22] += amp_sv[0];
@@ -5028,17 +5032,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 238
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[34], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[34], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[34], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[34], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[12] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[34], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[34], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[12] += amp_sv[0];
@@ -5047,12 +5051,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 239 OF 1240 ***
 
       // Wavefunction(s) for diagram number 239
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[57] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[81] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[82] );
+      helas_VVVV1P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[57] );
+      helas_VVVV3P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[81] );
+      helas_VVVV4P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[82] );
 
       // Amplitude(s) for diagram number 239
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[57], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[57], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
@@ -5061,7 +5065,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[81], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[81], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
@@ -5070,7 +5074,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[82], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[82], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -5086,17 +5090,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 240
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[55], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] += amp_sv[0];
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[55], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[16] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[55], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
       jamp_sv[16] += amp_sv[0];
@@ -5108,17 +5112,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 241
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[34], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[34], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[34], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[34], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[18] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[34], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[34], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[18] += amp_sv[0];
@@ -5127,12 +5131,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 242 OF 1240 ***
 
       // Wavefunction(s) for diagram number 242
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[55] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[83] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[84] );
+      helas_VVVV1P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[55] );
+      helas_VVVV3P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[83] );
+      helas_VVVV4P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[84] );
 
       // Amplitude(s) for diagram number 242
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[55], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[55], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
@@ -5141,7 +5145,7 @@ namespace mg5amcCpu
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[83], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[83], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -5150,7 +5154,7 @@ namespace mg5amcCpu
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[84], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[84], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -5166,17 +5170,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 243
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[10] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
       jamp_sv[10] += amp_sv[0];
@@ -5188,17 +5192,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 244
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[34], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[34], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[34], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[34], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[34], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[34], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[19] += amp_sv[0];
@@ -5210,17 +5214,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 245
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[4] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
@@ -5232,7 +5236,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 246
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[30], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[30], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
@@ -5241,7 +5245,7 @@ namespace mg5amcCpu
       jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[31], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[31], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
@@ -5250,7 +5254,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[32], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[32], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -5263,13 +5267,13 @@ namespace mg5amcCpu
       // *** DIAGRAM 247 OF 1240 ***
 
       // Wavefunction(s) for diagram number 247
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[77], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
+      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
+      helas_FFV1_2( w_fp[62], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
+      helas_FFV1_1( w_fp[77], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 247
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 247 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5279,10 +5283,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 248 OF 1240 ***
 
       // Wavefunction(s) for diagram number 248
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[77], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[85] );
+      helas_FFV1_1( w_fp[77], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[85] );
 
       // Amplitude(s) for diagram number 248
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 248 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5292,11 +5296,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 249 OF 1240 ***
 
       // Wavefunction(s) for diagram number 249
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[77], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[87] );
+      helas_FFV1_2( w_fp[62], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] );
+      helas_FFV1_1( w_fp[77], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[87] );
 
       // Amplitude(s) for diagram number 249
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 249 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5309,7 +5313,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 250
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 250 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5319,10 +5323,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 251 OF 1240 ***
 
       // Wavefunction(s) for diagram number 251
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] );
+      helas_FFV1_2( w_fp[62], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] );
 
       // Amplitude(s) for diagram number 251
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 251 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5335,7 +5339,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 252
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 252 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5345,10 +5349,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 253 OF 1240 ***
 
       // Wavefunction(s) for diagram number 253
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[89] );
+      helas_FFV1P0_3( w_fp[62], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[89] );
 
       // Amplitude(s) for diagram number 253
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[6], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 253 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5361,10 +5365,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 254 OF 1240 ***
 
       // Wavefunction(s) for diagram number 254
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
+      helas_FFV1_2( w_fp[62], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
 
       // Amplitude(s) for diagram number 254
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 254 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5378,7 +5382,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 255
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 255 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5392,7 +5396,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 256
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[5], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 256 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5405,10 +5409,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 257 OF 1240 ***
 
       // Wavefunction(s) for diagram number 257
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] );
+      helas_FFV1_2( w_fp[62], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] );
 
       // Amplitude(s) for diagram number 257
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[91], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[91], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 257 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5422,7 +5426,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 258
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 258 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5436,7 +5440,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 259
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[29], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 259 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5452,7 +5456,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 260
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 260 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5463,10 +5467,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 261 OF 1240 ***
 
       // Wavefunction(s) for diagram number 261
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] );
+      helas_FFV1_2( w_fp[62], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] );
 
       // Amplitude(s) for diagram number 261
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[89], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[89], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 261 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5480,17 +5484,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 262
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[77], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[77], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[33] += amp_sv[0];
       jamp_sv[35] -= amp_sv[0];
       jamp_sv[41] -= amp_sv[0];
       jamp_sv[47] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[77], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[77], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[35] -= amp_sv[0];
       jamp_sv[39] += amp_sv[0];
       jamp_sv[41] -= amp_sv[0];
       jamp_sv[45] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[77], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[77], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[33] -= amp_sv[0];
       jamp_sv[39] += amp_sv[0];
       jamp_sv[45] += amp_sv[0];
@@ -5499,10 +5503,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 263 OF 1240 ***
 
       // Wavefunction(s) for diagram number 263
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[92] );
+      helas_FFV1P0_3( w_fp[62], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[92] );
 
       // Amplitude(s) for diagram number 263
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 263 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5522,7 +5526,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 264
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 264 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5542,7 +5546,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 265
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[81] += cxtype( 0, 1 ) * amp_sv[0];
@@ -5551,7 +5555,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[81] += cxtype( 0, 1 ) * amp_sv[0];
@@ -5560,7 +5564,7 @@ namespace mg5amcCpu
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -5573,10 +5577,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 266 OF 1240 ***
 
       // Wavefunction(s) for diagram number 266
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[93] );
+      helas_FFV1_1( w_fp[2], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[93] );
 
       // Amplitude(s) for diagram number 266
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 266 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5590,7 +5594,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 267
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 267 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5606,7 +5610,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 268
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 268 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5620,7 +5624,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 269
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 269 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5633,10 +5637,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 270 OF 1240 ***
 
       // Wavefunction(s) for diagram number 270
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] );
+      helas_FFV1_2( w_fp[62], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] );
 
       // Amplitude(s) for diagram number 270
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[94], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[94], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 270 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5647,10 +5651,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 271 OF 1240 ***
 
       // Wavefunction(s) for diagram number 271
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[95] );
+      helas_FFV1P0_3( w_fp[62], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[95] );
 
       // Amplitude(s) for diagram number 271
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 271 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5666,7 +5670,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 272
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 272 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5680,7 +5684,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 273
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[94], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[94], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 273 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5691,10 +5695,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 274 OF 1240 ***
 
       // Wavefunction(s) for diagram number 274
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[96] );
+      helas_FFV1P0_3( w_fp[62], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[96] );
 
       // Amplitude(s) for diagram number 274
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 274 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5710,7 +5714,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 275
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 275 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5724,7 +5728,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 276
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[94], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[94], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 276 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5740,7 +5744,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 277
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[29], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[29], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 277 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5760,7 +5764,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 278
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[89], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[89], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 278 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5776,7 +5780,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 279
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 279 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5796,7 +5800,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 280
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 280 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5816,7 +5820,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 281
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -5825,7 +5829,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -5834,7 +5838,7 @@ namespace mg5amcCpu
       jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -5847,10 +5851,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 282 OF 1240 ***
 
       // Wavefunction(s) for diagram number 282
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] );
+      helas_FFV1_1( w_fp[2], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] );
 
       // Amplitude(s) for diagram number 282
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 282 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5864,7 +5868,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 283
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 283 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5880,7 +5884,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 284
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 284 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5894,7 +5898,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 285
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 285 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5907,10 +5911,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 286 OF 1240 ***
 
       // Wavefunction(s) for diagram number 286
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] );
+      helas_FFV1_2( w_fp[62], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] );
 
       // Amplitude(s) for diagram number 286
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[97], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[97], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 286 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5921,10 +5925,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 287 OF 1240 ***
 
       // Wavefunction(s) for diagram number 287
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[98] );
+      helas_FFV1P0_3( w_fp[62], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[98] );
 
       // Amplitude(s) for diagram number 287
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[6], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 287 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5940,7 +5944,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 288
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 288 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5954,7 +5958,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 289
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[97], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[97], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 289 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5968,7 +5972,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 290
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 290 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5984,7 +5988,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 291
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 291 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5998,7 +6002,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 292
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[97], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[97], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 292 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6014,7 +6018,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 293
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[27], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[27], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 293 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6034,7 +6038,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 294
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[91], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[91], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 294 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6050,7 +6054,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 295
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 295 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6070,7 +6074,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 296
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 296 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6090,7 +6094,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 297
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -6099,7 +6103,7 @@ namespace mg5amcCpu
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[47] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -6108,7 +6112,7 @@ namespace mg5amcCpu
       jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -6121,10 +6125,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 298 OF 1240 ***
 
       // Wavefunction(s) for diagram number 298
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] );
+      helas_FFV1_1( w_fp[2], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] );
 
       // Amplitude(s) for diagram number 298
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 298 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6138,7 +6142,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 299
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 299 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6154,7 +6158,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 300
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 300 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6168,7 +6172,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 301
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 301 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6181,10 +6185,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 302 OF 1240 ***
 
       // Wavefunction(s) for diagram number 302
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_FFV1_2( w_fp[62], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 302
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 302 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6198,7 +6202,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 303
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 303 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6214,7 +6218,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 304
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 304 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6228,7 +6232,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 305
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 305 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6242,7 +6246,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 306
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 306 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6258,7 +6262,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 307
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 307 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6272,7 +6276,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 308
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 308 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6288,7 +6292,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 309
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[24], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[24], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 309 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6308,7 +6312,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 310
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 310 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6321,10 +6325,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 311 OF 1240 ***
 
       // Wavefunction(s) for diagram number 311
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_FFV1_2( w_fp[62], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 311
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 311 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6337,7 +6341,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 312
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 312 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6347,10 +6351,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 313 OF 1240 ***
 
       // Wavefunction(s) for diagram number 313
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[33], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[100] );
+      helas_FFV1_1( w_fp[33], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[100] );
 
       // Amplitude(s) for diagram number 313
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 313 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6363,7 +6367,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 314
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 314 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6376,7 +6380,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 315
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 315 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6389,7 +6393,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 316
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 316 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6402,7 +6406,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 317
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 317 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6416,7 +6420,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 318
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[29], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 318 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6432,7 +6436,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 319
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[89], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[89], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 319 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6446,7 +6450,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 320
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 320 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6459,7 +6463,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 321
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 321 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6469,10 +6473,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 322 OF 1240 ***
 
       // Wavefunction(s) for diagram number 322
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[39], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] );
+      helas_FFV1_1( w_fp[39], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] );
 
       // Amplitude(s) for diagram number 322
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 322 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6485,7 +6489,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 323
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 323 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6498,7 +6502,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 324
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 324 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6511,7 +6515,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 325
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 325 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6524,7 +6528,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 326
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 326 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6538,7 +6542,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 327
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 327 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6554,7 +6558,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 328
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[91], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[91], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 328 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6568,7 +6572,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 329
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 329 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6581,7 +6585,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 330
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 330 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6591,10 +6595,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 331 OF 1240 ***
 
       // Wavefunction(s) for diagram number 331
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[47], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] );
+      helas_FFV1_1( w_fp[47], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] );
 
       // Amplitude(s) for diagram number 331
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 331 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6607,7 +6611,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 332
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 332 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6620,7 +6624,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 333
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 333 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6633,7 +6637,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 334
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 334 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6646,7 +6650,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 335
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 335 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6660,7 +6664,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 336
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 336 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6676,7 +6680,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 337
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 337 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6690,7 +6694,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 338
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 338 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6704,7 +6708,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 339
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 339 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6720,7 +6724,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 340
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 340 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6740,7 +6744,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 341
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 341 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6760,7 +6764,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 342
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
@@ -6769,7 +6773,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
@@ -6778,7 +6782,7 @@ namespace mg5amcCpu
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -6794,7 +6798,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 343
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 343 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6810,7 +6814,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 344
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 344 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6824,7 +6828,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 345
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 345 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6838,7 +6842,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 346
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 346 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6854,7 +6858,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 347
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 347 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6874,7 +6878,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 348
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 348 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6894,7 +6898,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 349
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0];
@@ -6903,7 +6907,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0];
@@ -6912,7 +6916,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0];
@@ -6928,7 +6932,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 350
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 350 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6944,7 +6948,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 351
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 351 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6958,7 +6962,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 352
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 352 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6972,7 +6976,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 353
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 353 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6988,7 +6992,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 354
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 354 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7008,7 +7012,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 355
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 355 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7028,7 +7032,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 356
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7037,7 +7041,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7046,7 +7050,7 @@ namespace mg5amcCpu
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -7062,7 +7066,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 357
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 357 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7078,7 +7082,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 358
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 358 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7092,7 +7096,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 359
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[73], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[73], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7101,7 +7105,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[79], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[79], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7110,7 +7114,7 @@ namespace mg5amcCpu
       jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[80], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[80], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -7126,17 +7130,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 360
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[33] += amp_sv[0];
       jamp_sv[39] -= amp_sv[0];
       jamp_sv[63] -= amp_sv[0];
       jamp_sv[87] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[39] -= amp_sv[0];
       jamp_sv[57] += amp_sv[0];
       jamp_sv[63] -= amp_sv[0];
       jamp_sv[81] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[33] -= amp_sv[0];
       jamp_sv[57] += amp_sv[0];
       jamp_sv[81] += amp_sv[0];
@@ -7148,17 +7152,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 361
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[47], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[47], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[105] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[47], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[47], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[47], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[47], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
       jamp_sv[117] += amp_sv[0];
@@ -7170,7 +7174,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 362
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[57], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[57], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7179,7 +7183,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[81], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[81], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7188,7 +7192,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[82], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[82], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -7204,17 +7208,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 363
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[35] += amp_sv[0];
       jamp_sv[45] -= amp_sv[0];
       jamp_sv[69] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[45] -= amp_sv[0];
       jamp_sv[59] += amp_sv[0];
       jamp_sv[69] -= amp_sv[0];
       jamp_sv[105] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[35] -= amp_sv[0];
       jamp_sv[59] += amp_sv[0];
       jamp_sv[105] += amp_sv[0];
@@ -7226,17 +7230,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 364
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[39], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[39], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[81] += amp_sv[0];
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[39], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[39], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[87] += amp_sv[0];
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[39], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[39], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[81] -= amp_sv[0];
       jamp_sv[87] += amp_sv[0];
       jamp_sv[93] += amp_sv[0];
@@ -7248,7 +7252,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 365
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[55], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[55], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7257,7 +7261,7 @@ namespace mg5amcCpu
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[83], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[83], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7266,7 +7270,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[84], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[84], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7282,17 +7286,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 366
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[41] += amp_sv[0];
       jamp_sv[47] -= amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[47] -= amp_sv[0];
       jamp_sv[83] += amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[107] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[41] -= amp_sv[0];
       jamp_sv[83] += amp_sv[0];
       jamp_sv[107] += amp_sv[0];
@@ -7304,17 +7308,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 367
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[33], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[33], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[57] += amp_sv[0];
       jamp_sv[59] -= amp_sv[0];
       jamp_sv[65] -= amp_sv[0];
       jamp_sv[71] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[33], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[33], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[59] -= amp_sv[0];
       jamp_sv[63] += amp_sv[0];
       jamp_sv[65] -= amp_sv[0];
       jamp_sv[69] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[33], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[33], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[57] -= amp_sv[0];
       jamp_sv[63] += amp_sv[0];
       jamp_sv[69] += amp_sv[0];
@@ -7326,17 +7330,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 368
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[65] += amp_sv[0];
       jamp_sv[71] -= amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[71] -= amp_sv[0];
       jamp_sv[89] += amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[65] -= amp_sv[0];
       jamp_sv[89] += amp_sv[0];
       jamp_sv[113] += amp_sv[0];
@@ -7348,7 +7352,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 369
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[30], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[30], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7357,7 +7361,7 @@ namespace mg5amcCpu
       jamp_sv[71] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[31], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[31], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7366,7 +7370,7 @@ namespace mg5amcCpu
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[32], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[32], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -7379,11 +7383,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 370 OF 1240 ***
 
       // Wavefunction(s) for diagram number 370
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[92] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[92] );
+      helas_FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 370
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 370 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7397,7 +7401,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 371
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 371 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7408,11 +7412,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 372 OF 1240 ***
 
       // Wavefunction(s) for diagram number 372
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[62] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[34] );
+      helas_VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[62] );
+      helas_FFV1P0_3( w_fp[3], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[34] );
 
       // Amplitude(s) for diagram number 372
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 372 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7432,7 +7436,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 373
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[85], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[85], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 373 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7445,10 +7449,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 374 OF 1240 ***
 
       // Wavefunction(s) for diagram number 374
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[86] );
+      helas_VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 374
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 374 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7468,7 +7472,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 375
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 375 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7481,12 +7485,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 376 OF 1240 ***
 
       // Wavefunction(s) for diagram number 376
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] );
+      helas_VVVV1P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] );
+      helas_VVVV3P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] );
+      helas_VVVV4P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] );
 
       // Amplitude(s) for diagram number 376
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7495,7 +7499,7 @@ namespace mg5amcCpu
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -7504,7 +7508,7 @@ namespace mg5amcCpu
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -7517,10 +7521,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 377 OF 1240 ***
 
       // Wavefunction(s) for diagram number 377
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[95] );
+      helas_FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[95] );
 
       // Amplitude(s) for diagram number 377
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[95], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[95], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 377 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7531,10 +7535,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 378 OF 1240 ***
 
       // Wavefunction(s) for diagram number 378
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
+      helas_FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
 
       // Amplitude(s) for diagram number 378
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 378 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7548,7 +7552,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 379
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 379 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7564,7 +7568,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 380
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[95], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[95], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 380 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7575,10 +7579,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 381 OF 1240 ***
 
       // Wavefunction(s) for diagram number 381
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[101] );
+      helas_FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[101] );
 
       // Amplitude(s) for diagram number 381
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[101], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[101], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 381 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7592,7 +7596,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 382
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[77], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 382 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7608,7 +7612,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 383
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[95], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[95], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 383 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7624,7 +7628,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 384
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 384 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7637,10 +7641,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 385 OF 1240 ***
 
       // Wavefunction(s) for diagram number 385
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[95] );
+      helas_VVV1P0_1( w_fp[92], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[95] );
 
       // Amplitude(s) for diagram number 385
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 385 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7657,10 +7661,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 386 OF 1240 ***
 
       // Wavefunction(s) for diagram number 386
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
+      helas_FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
 
       // Amplitude(s) for diagram number 386
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 386 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7674,7 +7678,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 387
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 387 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7685,10 +7689,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 388 OF 1240 ***
 
       // Wavefunction(s) for diagram number 388
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[103] );
+      helas_FFV1P0_3( w_fp[52], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[103] );
 
       // Amplitude(s) for diagram number 388
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 388 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7708,7 +7712,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 389
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 389 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7724,7 +7728,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 390
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 390 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7744,7 +7748,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 391
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 391 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7760,7 +7764,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 392
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7769,7 +7773,7 @@ namespace mg5amcCpu
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -7778,7 +7782,7 @@ namespace mg5amcCpu
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -7791,10 +7795,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 393 OF 1240 ***
 
       // Wavefunction(s) for diagram number 393
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
+      helas_FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
 
       // Amplitude(s) for diagram number 393
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 393 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7805,10 +7809,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 394 OF 1240 ***
 
       // Wavefunction(s) for diagram number 394
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[105] );
+      helas_FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[105] );
 
       // Amplitude(s) for diagram number 394
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[105], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[105], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 394 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7822,7 +7826,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 395
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[39], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 395 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7838,7 +7842,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 396
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 396 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7849,10 +7853,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 397 OF 1240 ***
 
       // Wavefunction(s) for diagram number 397
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
+      helas_FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
 
       // Amplitude(s) for diagram number 397
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[106], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[106], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 397 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7866,7 +7870,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 398
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[47], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 398 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7882,7 +7886,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 399
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 399 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7898,7 +7902,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 400
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[102], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[102], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 400 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7914,7 +7918,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 401
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 401 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7934,7 +7938,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 402
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 402 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7950,7 +7954,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 403
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 403 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7970,7 +7974,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 404
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 404 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7986,7 +7990,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 405
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 405 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8006,7 +8010,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 406
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[94], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[94], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 406 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8026,7 +8030,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 407
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 407 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8046,7 +8050,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 408
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
@@ -8063,7 +8067,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -8080,7 +8084,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
       jamp_sv[36] += amp_sv[0];
@@ -8101,10 +8105,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 409 OF 1240 ***
 
       // Wavefunction(s) for diagram number 409
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 409
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 409 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8129,10 +8133,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 410 OF 1240 ***
 
       // Wavefunction(s) for diagram number 410
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[107] );
+      helas_VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[107] );
 
       // Amplitude(s) for diagram number 410
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 410 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8160,7 +8164,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 411
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 411 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8188,7 +8192,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 412
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 412 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8208,7 +8212,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 413
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[106], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[106], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 413 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8224,7 +8228,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 414
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 414 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8240,7 +8244,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 415
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 415 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8260,7 +8264,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 416
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[102], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[102], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 416 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8276,7 +8280,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 417
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[101], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[101], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 417 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8292,7 +8296,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 418
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 418 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8308,7 +8312,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 419
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 419 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8328,7 +8332,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 420
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 420 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8344,7 +8348,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 421
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 421 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8364,7 +8368,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 422
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[97], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[97], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 422 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8384,7 +8388,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 423
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 423 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8404,7 +8408,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 424
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
@@ -8421,7 +8425,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[7] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[9] += amp_sv[0];
@@ -8438,7 +8442,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[9] += amp_sv[0];
       jamp_sv[42] += amp_sv[0];
@@ -8459,10 +8463,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 425 OF 1240 ***
 
       // Wavefunction(s) for diagram number 425
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 425
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 425 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8490,7 +8494,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 426
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 426 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8518,7 +8522,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 427
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[8], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[8], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 427 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8546,7 +8550,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 428
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 428 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8566,7 +8570,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 429
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[105], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[105], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 429 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8582,7 +8586,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 430
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 430 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8598,7 +8602,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 431
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 431 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8618,7 +8622,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 432
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[102], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[102], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 432 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8634,7 +8638,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 433
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 433 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8647,10 +8651,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 434 OF 1240 ***
 
       // Wavefunction(s) for diagram number 434
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 434
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 434 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8678,7 +8682,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 435
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 435 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8706,7 +8710,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 436
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -8723,7 +8727,7 @@ namespace mg5amcCpu
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -8740,7 +8744,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -8761,10 +8765,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 437 OF 1240 ***
 
       // Wavefunction(s) for diagram number 437
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[108] );
+      helas_VVV1P0_1( w_fp[1], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[108] );
 
       // Amplitude(s) for diagram number 437
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 437 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8792,7 +8796,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 438
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 438 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8820,7 +8824,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 439
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -8837,7 +8841,7 @@ namespace mg5amcCpu
       jamp_sv[109] += amp_sv[0];
       jamp_sv[115] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[9] += amp_sv[0];
       jamp_sv[42] += amp_sv[0];
@@ -8854,7 +8858,7 @@ namespace mg5amcCpu
       jamp_sv[104] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -8878,7 +8882,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 440
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 440 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8906,7 +8910,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 441
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 441 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8934,7 +8938,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 442
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[11] += amp_sv[0];
       jamp_sv[25] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -8951,7 +8955,7 @@ namespace mg5amcCpu
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
       jamp_sv[36] += amp_sv[0];
@@ -8968,7 +8972,7 @@ namespace mg5amcCpu
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -8989,12 +8993,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 443 OF 1240 ***
 
       // Wavefunction(s) for diagram number 443
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
+      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
+      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 443
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
@@ -9011,7 +9015,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -9028,7 +9032,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -9049,12 +9053,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 444 OF 1240 ***
 
       // Wavefunction(s) for diagram number 444
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[113] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[114] );
+      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] );
+      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[113] );
+      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[114] );
 
       // Amplitude(s) for diagram number 444
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
@@ -9071,7 +9075,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -9088,7 +9092,7 @@ namespace mg5amcCpu
       jamp_sv[94] += amp_sv[0];
       jamp_sv[98] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[114], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[114], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -9112,7 +9116,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 445
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -9129,7 +9133,7 @@ namespace mg5amcCpu
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[11] += amp_sv[0];
       jamp_sv[25] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -9146,7 +9150,7 @@ namespace mg5amcCpu
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -9170,7 +9174,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 446
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[6] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
@@ -9187,7 +9191,7 @@ namespace mg5amcCpu
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[6] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
@@ -9204,7 +9208,7 @@ namespace mg5amcCpu
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -9228,7 +9232,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 447
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[29], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[29], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 447 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9256,7 +9260,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 448
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 448 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9284,7 +9288,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 449
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 449 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9312,7 +9316,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 450
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 450 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9332,7 +9336,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 451
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[44], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[44], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 451 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9348,7 +9352,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 452
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 452 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9362,7 +9366,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 453
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 453 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9376,7 +9380,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 454
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[89], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[89], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 454 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9392,7 +9396,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 455
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 455 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9412,7 +9416,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 456
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9421,7 +9425,7 @@ namespace mg5amcCpu
       jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[113], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[113], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9430,7 +9434,7 @@ namespace mg5amcCpu
       jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[114], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[114], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9446,7 +9450,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 457
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 457 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9462,7 +9466,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 458
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[105], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[105], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 458 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9476,7 +9480,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 459
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[101], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[101], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 459 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9490,7 +9494,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 460
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 460 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9510,7 +9514,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 461
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[50], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[50], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 461 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9526,7 +9530,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 462
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 462 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9540,7 +9544,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 463
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 463 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9554,7 +9558,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 464
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[91], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[91], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 464 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9570,7 +9574,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 465
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 465 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9590,7 +9594,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 466
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9599,7 +9603,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9608,7 +9612,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9624,7 +9628,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 467
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[47], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 467 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9640,7 +9644,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 468
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 468 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9654,7 +9658,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 469
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 469 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9668,7 +9672,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 470
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 470 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9688,7 +9692,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 471
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 471 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9704,7 +9708,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 472
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 472 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9718,7 +9722,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 473
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 473 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9732,7 +9736,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 474
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 474 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9748,7 +9752,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 475
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 475 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9768,7 +9772,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 476
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9777,7 +9781,7 @@ namespace mg5amcCpu
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[113], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[113], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9786,7 +9790,7 @@ namespace mg5amcCpu
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[114], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[114], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9802,7 +9806,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 477
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 477 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9822,7 +9826,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 478
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 478 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9838,7 +9842,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 479
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 479 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9852,7 +9856,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 480
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 480 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9866,7 +9870,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 481
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 481 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9882,7 +9886,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 482
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 482 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9902,7 +9906,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 483
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9911,7 +9915,7 @@ namespace mg5amcCpu
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9920,7 +9924,7 @@ namespace mg5amcCpu
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9936,7 +9940,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 484
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[18], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[18], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 484 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9956,7 +9960,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 485
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 485 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9976,7 +9980,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 486
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 486 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9996,7 +10000,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 487
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 487 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10012,7 +10016,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 488
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 488 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10032,7 +10036,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 489
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 489 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10048,7 +10052,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 490
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0];
@@ -10057,7 +10061,7 @@ namespace mg5amcCpu
       jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0];
@@ -10066,7 +10070,7 @@ namespace mg5amcCpu
       jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -10082,7 +10086,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 491
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[40] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0];
@@ -10091,7 +10095,7 @@ namespace mg5amcCpu
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -10100,7 +10104,7 @@ namespace mg5amcCpu
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -10116,7 +10120,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 492
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[55], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[55], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[9] += amp_sv[0];
@@ -10133,7 +10137,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[83], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[83], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[7] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[9] += amp_sv[0];
@@ -10150,7 +10154,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[84], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[84], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -10171,11 +10175,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 493 OF 1240 ***
 
       // Wavefunction(s) for diagram number 493
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[92] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[92] );
+      helas_FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 493
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 493 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10189,7 +10193,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 494
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 494 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10200,10 +10204,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 495 OF 1240 ***
 
       // Wavefunction(s) for diagram number 495
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[102] );
+      helas_VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[102] );
 
       // Amplitude(s) for diagram number 495
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[102], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[102], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 495 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10223,7 +10227,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 496
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[85], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[85], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 496 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10236,10 +10240,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 497 OF 1240 ***
 
       // Wavefunction(s) for diagram number 497
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 497
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 497 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10259,7 +10263,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 498
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[87], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[87], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 498 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10272,12 +10276,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 499 OF 1240 ***
 
       // Wavefunction(s) for diagram number 499
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[110] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[109] );
+      helas_VVVV1P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_VVVV3P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[110] );
+      helas_VVVV4P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[109] );
 
       // Amplitude(s) for diagram number 499
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0];
@@ -10286,7 +10290,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0];
@@ -10295,7 +10299,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0];
@@ -10308,10 +10312,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 500 OF 1240 ***
 
       // Wavefunction(s) for diagram number 500
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
+      helas_FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
 
       // Amplitude(s) for diagram number 500
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[62], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[62], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 500 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10322,10 +10326,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 501 OF 1240 ***
 
       // Wavefunction(s) for diagram number 501
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
+      helas_FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
 
       // Amplitude(s) for diagram number 501
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[114], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[114], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 501 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10339,7 +10343,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 502
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[77], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 502 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10355,7 +10359,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 503
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[62], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[62], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 503 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10366,10 +10370,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 504 OF 1240 ***
 
       // Wavefunction(s) for diagram number 504
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] );
+      helas_FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] );
 
       // Amplitude(s) for diagram number 504
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[113], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[113], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 504 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10383,7 +10387,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 505
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[77], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 505 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10399,7 +10403,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 506
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[62], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[62], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 506 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10415,7 +10419,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 507
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 507 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10428,10 +10432,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 508 OF 1240 ***
 
       // Wavefunction(s) for diagram number 508
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[62] );
+      helas_VVV1P0_1( w_fp[92], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[62] );
 
       // Amplitude(s) for diagram number 508
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 508 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10448,10 +10452,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 509 OF 1240 ***
 
       // Wavefunction(s) for diagram number 509
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[112] );
+      helas_FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[112] );
 
       // Amplitude(s) for diagram number 509
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 509 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10465,7 +10469,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 510
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 510 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10479,7 +10483,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 511
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[102], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[102], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 511 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10499,7 +10503,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 512
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 512 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10515,7 +10519,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 513
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 513 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10535,7 +10539,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 514
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 514 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10551,7 +10555,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 515
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0];
@@ -10560,7 +10564,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
@@ -10569,7 +10573,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
@@ -10582,10 +10586,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 516 OF 1240 ***
 
       // Wavefunction(s) for diagram number 516
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] );
+      helas_FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] );
 
       // Amplitude(s) for diagram number 516
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 516 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10596,10 +10600,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 517 OF 1240 ***
 
       // Wavefunction(s) for diagram number 517
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
+      helas_FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
 
       // Amplitude(s) for diagram number 517
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[98], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[98], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 517 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10613,7 +10617,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 518
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 518 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10629,7 +10633,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 519
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 519 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10640,10 +10644,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 520 OF 1240 ***
 
       // Wavefunction(s) for diagram number 520
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
+      helas_FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
 
       // Amplitude(s) for diagram number 520
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 520 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10657,7 +10661,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 521
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[47], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 521 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10673,7 +10677,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 522
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 522 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10689,7 +10693,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 523
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[112], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[112], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 523 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10705,7 +10709,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 524
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 524 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10725,7 +10729,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 525
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 525 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10741,7 +10745,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 526
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[112], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 526 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10761,7 +10765,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 527
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 527 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10777,7 +10781,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 528
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 528 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10797,7 +10801,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 529
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[93], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[93], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 529 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10817,7 +10821,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 530
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 530 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10837,7 +10841,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 531
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[12] += amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -10854,7 +10858,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[114] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[12] += amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
@@ -10871,7 +10875,7 @@ namespace mg5amcCpu
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -10892,10 +10896,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 532 OF 1240 ***
 
       // Wavefunction(s) for diagram number 532
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[86] );
+      helas_VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 532
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 532 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10920,10 +10924,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 533 OF 1240 ***
 
       // Wavefunction(s) for diagram number 533
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[101] );
+      helas_VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[101] );
 
       // Amplitude(s) for diagram number 533
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[6], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 533 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10951,7 +10955,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 534
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[8], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[8], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 534 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10979,7 +10983,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 535
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 535 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10999,7 +11003,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 536
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 536 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11015,7 +11019,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 537
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 537 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11031,7 +11035,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 538
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 538 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11051,7 +11055,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 539
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[112], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[112], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 539 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11067,7 +11071,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 540
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[113], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[113], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 540 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11083,7 +11087,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 541
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 541 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11099,7 +11103,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 542
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[112], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 542 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11119,7 +11123,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 543
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 543 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11135,7 +11139,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 544
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 544 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11155,7 +11159,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 545
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[97], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[97], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 545 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11175,7 +11179,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 546
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 546 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11195,7 +11199,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 547
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[13] += amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
@@ -11212,7 +11216,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[103] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[13] += amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
@@ -11229,7 +11233,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[43] += amp_sv[0];
@@ -11250,10 +11254,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 548 OF 1240 ***
 
       // Wavefunction(s) for diagram number 548
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[86] );
+      helas_VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 548
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 548 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11281,7 +11285,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 549
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 549 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11309,7 +11313,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 550
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[8], w_fp[102], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[8], w_fp[102], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 550 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11337,7 +11341,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 551
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 551 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11357,7 +11361,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 552
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[98], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[98], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 552 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11373,7 +11377,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 553
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 553 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11389,7 +11393,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 554
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 554 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11409,7 +11413,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 555
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[112], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[112], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 555 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11425,7 +11429,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 556
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[114], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[114], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 556 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11438,10 +11442,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 557 OF 1240 ***
 
       // Wavefunction(s) for diagram number 557
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[86] );
+      helas_VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 557
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 557 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11469,7 +11473,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 558
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 558 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11497,7 +11501,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 559
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -11514,7 +11518,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
@@ -11531,7 +11535,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[26] += amp_sv[0];
@@ -11555,7 +11559,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 560
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[102], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[102], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 560 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11583,7 +11587,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 561
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[102], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[102], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 561 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11611,7 +11615,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 562
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[15] += amp_sv[0];
       jamp_sv[26] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -11628,7 +11632,7 @@ namespace mg5amcCpu
       jamp_sv[109] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[43] += amp_sv[0];
@@ -11645,7 +11649,7 @@ namespace mg5amcCpu
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -11669,7 +11673,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 563
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 563 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11697,7 +11701,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 564
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 564 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11725,7 +11729,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 565
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[17] += amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -11742,7 +11746,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -11759,7 +11763,7 @@ namespace mg5amcCpu
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -11780,12 +11784,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 566 OF 1240 ***
 
       // Wavefunction(s) for diagram number 566
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] );
+      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] );
+      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] );
+      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] );
 
       // Amplitude(s) for diagram number 566
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[12] += amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -11802,7 +11806,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[114] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -11819,7 +11823,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
@@ -11840,12 +11844,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 567 OF 1240 ***
 
       // Wavefunction(s) for diagram number 567
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] );
+      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] );
+      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] );
+      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] );
 
       // Amplitude(s) for diagram number 567
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[13] += amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
@@ -11862,7 +11866,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[103] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -11879,7 +11883,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -11903,7 +11907,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 568
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
       jamp_sv[26] += amp_sv[0];
@@ -11920,7 +11924,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[17] += amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -11937,7 +11941,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[15] += amp_sv[0];
       jamp_sv[26] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -11961,7 +11965,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 569
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[12] += amp_sv[0];
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[26] -= amp_sv[0];
@@ -11978,7 +11982,7 @@ namespace mg5amcCpu
       jamp_sv[110] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[12] += amp_sv[0];
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
@@ -11995,7 +11999,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
       jamp_sv[26] += amp_sv[0];
@@ -12019,7 +12023,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 570
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[27], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[27], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 570 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12047,7 +12051,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 571
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 571 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12075,7 +12079,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 572
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 572 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12103,7 +12107,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 573
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 573 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12123,7 +12127,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 574
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[36], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[36], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 574 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12139,7 +12143,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 575
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 575 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12153,7 +12157,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 576
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 576 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12167,7 +12171,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 577
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[100], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[100], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 577 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12183,7 +12187,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 578
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 578 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12203,7 +12207,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 579
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12212,7 +12216,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12221,7 +12225,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12237,7 +12241,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 580
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 580 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12253,7 +12257,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 581
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[98], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[98], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 581 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12267,7 +12271,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 582
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[113], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[113], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 582 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12281,7 +12285,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 583
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 583 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12301,7 +12305,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 584
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[49], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[49], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 584 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12317,7 +12321,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 585
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 585 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12331,7 +12335,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 586
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 586 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12345,7 +12349,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 587
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[91], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[91], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 587 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12361,7 +12365,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 588
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[102], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[102], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 588 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12381,7 +12385,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 589
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12390,7 +12394,7 @@ namespace mg5amcCpu
       jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12399,7 +12403,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12415,7 +12419,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 590
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[47], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 590 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12431,7 +12435,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 591
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 591 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12445,7 +12449,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 592
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[114], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[114], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 592 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12459,7 +12463,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 593
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 593 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12479,7 +12483,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 594
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 594 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12495,7 +12499,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 595
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 595 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12509,7 +12513,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 596
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 596 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12523,7 +12527,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 597
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 597 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12539,7 +12543,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 598
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 598 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12559,7 +12563,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 599
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12568,7 +12572,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12577,7 +12581,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12593,7 +12597,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 600
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 600 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12613,7 +12617,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 601
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 601 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12629,7 +12633,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 602
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 602 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12643,7 +12647,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 603
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 603 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12657,7 +12661,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 604
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 604 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12673,7 +12677,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 605
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[102], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[102], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 605 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12693,7 +12697,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 606
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12702,7 +12706,7 @@ namespace mg5amcCpu
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12711,7 +12715,7 @@ namespace mg5amcCpu
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12727,7 +12731,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 607
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[15], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[15], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 607 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12747,7 +12751,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 608
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 608 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12767,7 +12771,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 609
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[112], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 609 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12787,7 +12791,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 610
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 610 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12803,7 +12807,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 611
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 611 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12823,7 +12827,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 612
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 612 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12839,7 +12843,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 613
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[112], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12848,7 +12852,7 @@ namespace mg5amcCpu
       jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[112], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[14] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12857,7 +12861,7 @@ namespace mg5amcCpu
       jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[112], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[14] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -12873,7 +12877,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 614
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[34] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12882,7 +12886,7 @@ namespace mg5amcCpu
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -12891,7 +12895,7 @@ namespace mg5amcCpu
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -12907,7 +12911,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 615
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[57], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[57], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[15] += amp_sv[0];
@@ -12924,7 +12928,7 @@ namespace mg5amcCpu
       jamp_sv[77] += amp_sv[0];
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[81], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[81], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[13] += amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
@@ -12941,7 +12945,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[82], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[82], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[12] += amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
@@ -12962,11 +12966,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 616 OF 1240 ***
 
       // Wavefunction(s) for diagram number 616
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[92] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[92] );
+      helas_FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 616
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 616 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12980,7 +12984,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 617
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 617 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12991,10 +12995,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 618 OF 1240 ***
 
       // Wavefunction(s) for diagram number 618
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[112] );
+      helas_VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[112] );
 
       // Amplitude(s) for diagram number 618
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[112], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[112], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 618 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13014,7 +13018,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 619
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 619 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13027,10 +13031,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 620 OF 1240 ***
 
       // Wavefunction(s) for diagram number 620
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[86] );
+      helas_VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 620
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 620 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13050,7 +13054,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 621
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[87], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[87], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 621 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13063,12 +13067,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 622 OF 1240 ***
 
       // Wavefunction(s) for diagram number 622
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[107] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[95] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[105] );
+      helas_VVVV1P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[107] );
+      helas_VVVV3P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[95] );
+      helas_VVVV4P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[105] );
 
       // Amplitude(s) for diagram number 622
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0];
@@ -13077,7 +13081,7 @@ namespace mg5amcCpu
       jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0];
@@ -13086,7 +13090,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0];
@@ -13099,10 +13103,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 623 OF 1240 ***
 
       // Wavefunction(s) for diagram number 623
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
+      helas_FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
 
       // Amplitude(s) for diagram number 623
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 623 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13113,10 +13117,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 624 OF 1240 ***
 
       // Wavefunction(s) for diagram number 624
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] );
+      helas_FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] );
 
       // Amplitude(s) for diagram number 624
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 624 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13130,7 +13134,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 625
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 625 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13146,7 +13150,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 626
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 626 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13157,10 +13161,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 627 OF 1240 ***
 
       // Wavefunction(s) for diagram number 627
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
+      helas_FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
 
       // Amplitude(s) for diagram number 627
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 627 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13174,7 +13178,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 628
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[77], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 628 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13190,7 +13194,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 629
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 629 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13206,7 +13210,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 630
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 630 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13219,10 +13223,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 631 OF 1240 ***
 
       // Wavefunction(s) for diagram number 631
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[102] );
+      helas_VVV1P0_1( w_fp[92], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[102] );
 
       // Amplitude(s) for diagram number 631
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 631 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13239,10 +13243,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 632 OF 1240 ***
 
       // Wavefunction(s) for diagram number 632
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[96] );
+      helas_FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[96] );
 
       // Amplitude(s) for diagram number 632
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 632 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13256,7 +13260,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 633
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 633 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13270,7 +13274,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 634
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[112], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[112], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 634 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13290,7 +13294,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 635
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 635 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13306,7 +13310,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 636
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 636 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13326,7 +13330,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 637
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 637 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13342,7 +13346,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 638
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
@@ -13351,7 +13355,7 @@ namespace mg5amcCpu
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
@@ -13360,7 +13364,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0];
@@ -13373,10 +13377,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 639 OF 1240 ***
 
       // Wavefunction(s) for diagram number 639
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
+      helas_FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
 
       // Amplitude(s) for diagram number 639
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 639 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13387,10 +13391,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 640 OF 1240 ***
 
       // Wavefunction(s) for diagram number 640
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
+      helas_FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
 
       // Amplitude(s) for diagram number 640
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 640 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13404,7 +13408,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 641
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 641 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13420,7 +13424,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 642
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 642 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13431,10 +13435,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 643 OF 1240 ***
 
       // Wavefunction(s) for diagram number 643
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
+      helas_FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
 
       // Amplitude(s) for diagram number 643
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 643 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13448,7 +13452,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 644
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[39], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 644 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13464,7 +13468,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 645
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 645 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13480,7 +13484,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 646
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[96], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[96], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 646 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13496,7 +13500,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 647
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 647 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13516,7 +13520,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 648
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 648 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13532,7 +13536,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 649
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[96], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 649 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13552,7 +13556,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 650
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 650 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13568,7 +13572,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 651
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 651 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13588,7 +13592,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 652
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[93], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[93], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 652 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13608,7 +13612,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 653
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 653 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13628,7 +13632,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 654
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[18] += amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -13645,7 +13649,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[96] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[18] += amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[22] -= amp_sv[0];
@@ -13662,7 +13666,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -13683,10 +13687,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 655 OF 1240 ***
 
       // Wavefunction(s) for diagram number 655
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 655
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 655 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13711,10 +13715,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 656 OF 1240 ***
 
       // Wavefunction(s) for diagram number 656
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[113] );
+      helas_VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[113] );
 
       // Amplitude(s) for diagram number 656
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 656 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13742,7 +13746,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 657
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 657 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13770,7 +13774,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 658
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 658 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13790,7 +13794,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 659
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 659 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13806,7 +13810,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 660
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 660 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13822,7 +13826,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 661
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 661 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13842,7 +13846,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 662
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[96], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[96], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 662 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13858,7 +13862,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 663
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 663 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13874,7 +13878,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 664
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 664 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13890,7 +13894,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 665
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[96], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 665 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13910,7 +13914,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 666
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 666 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13926,7 +13930,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 667
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 667 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13946,7 +13950,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 668
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[94], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[94], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 668 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13966,7 +13970,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 669
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 669 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13986,7 +13990,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 670
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[19] += amp_sv[0];
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -14003,7 +14007,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[19] += amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
@@ -14020,7 +14024,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
       jamp_sv[37] += amp_sv[0];
@@ -14041,10 +14045,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 671 OF 1240 ***
 
       // Wavefunction(s) for diagram number 671
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 671
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 671 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14072,7 +14076,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 672
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 672 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14100,7 +14104,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 673
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[8], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[8], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 673 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14128,7 +14132,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 674
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 674 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14148,7 +14152,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 675
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[114], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 675 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14164,7 +14168,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 676
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 676 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14180,7 +14184,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 677
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 677 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14200,7 +14204,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 678
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[96], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[96], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 678 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14216,7 +14220,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 679
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 679 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14229,10 +14233,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 680 OF 1240 ***
 
       // Wavefunction(s) for diagram number 680
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 680
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 680 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14260,7 +14264,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 681
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 681 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14288,7 +14292,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 682
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
@@ -14305,7 +14309,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[97] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
@@ -14322,7 +14326,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[96] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
       jamp_sv[28] += amp_sv[0];
@@ -14346,7 +14350,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 683
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[112], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[112], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 683 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14374,7 +14378,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 684
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[112], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[112], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 684 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14402,7 +14406,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 685
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[21] += amp_sv[0];
       jamp_sv[28] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -14419,7 +14423,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
       jamp_sv[37] += amp_sv[0];
@@ -14436,7 +14440,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -14460,7 +14464,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 686
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 686 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14488,7 +14492,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 687
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 687 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14516,7 +14520,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 688
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[23] += amp_sv[0];
       jamp_sv[29] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -14533,7 +14537,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -14550,7 +14554,7 @@ namespace mg5amcCpu
       jamp_sv[91] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -14571,12 +14575,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 689 OF 1240 ***
 
       // Wavefunction(s) for diagram number 689
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[98] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[62] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[101] );
+      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[98] );
+      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[62] );
+      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[101] );
 
       // Amplitude(s) for diagram number 689
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[18] += amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -14593,7 +14597,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[96] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -14610,7 +14614,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
@@ -14631,12 +14635,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 690 OF 1240 ***
 
       // Wavefunction(s) for diagram number 690
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
+      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
+      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 690
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[19] += amp_sv[0];
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -14653,7 +14657,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -14670,7 +14674,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
@@ -14694,7 +14698,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 691
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
       jamp_sv[28] += amp_sv[0];
@@ -14711,7 +14715,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[99] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[23] += amp_sv[0];
       jamp_sv[29] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -14728,7 +14732,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[21] += amp_sv[0];
       jamp_sv[28] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -14752,7 +14756,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 692
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[18] += amp_sv[0];
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[28] -= amp_sv[0];
@@ -14769,7 +14773,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[96] -= amp_sv[0];
       jamp_sv[97] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[18] += amp_sv[0];
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
@@ -14786,7 +14790,7 @@ namespace mg5amcCpu
       jamp_sv[97] += amp_sv[0];
       jamp_sv[99] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
       jamp_sv[28] += amp_sv[0];
@@ -14810,7 +14814,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 693
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[24], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[24], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 693 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14838,7 +14842,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 694
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 694 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14866,7 +14870,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 695
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[102], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[102], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 695 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14894,7 +14898,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 696
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 696 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14914,7 +14918,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 697
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[35], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[35], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 697 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14930,7 +14934,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 698
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 698 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14944,7 +14948,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 699
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 699 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14958,7 +14962,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 700
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[100], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[100], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 700 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14974,7 +14978,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 701
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 701 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14994,7 +14998,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 702
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15003,7 +15007,7 @@ namespace mg5amcCpu
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15012,7 +15016,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15028,7 +15032,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 703
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 703 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15044,7 +15048,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 704
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 704 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15058,7 +15062,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 705
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 705 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15072,7 +15076,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 706
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 706 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15092,7 +15096,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 707
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[43], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[43], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 707 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15108,7 +15112,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 708
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 708 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15122,7 +15126,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 709
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 709 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15136,7 +15140,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 710
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[89], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[89], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 710 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15152,7 +15156,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 711
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[112], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[112], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 711 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15172,7 +15176,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 712
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15181,7 +15185,7 @@ namespace mg5amcCpu
       jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15190,7 +15194,7 @@ namespace mg5amcCpu
       jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15206,7 +15210,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 713
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 713 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15222,7 +15226,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 714
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 714 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15236,7 +15240,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 715
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 715 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15250,7 +15254,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 716
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 716 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15270,7 +15274,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 717
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 717 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15286,7 +15290,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 718
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 718 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15300,7 +15304,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 719
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 719 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15314,7 +15318,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 720
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 720 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15330,7 +15334,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 721
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 721 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15350,7 +15354,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 722
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15359,7 +15363,7 @@ namespace mg5amcCpu
       jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15368,7 +15372,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15384,7 +15388,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 723
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 723 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15404,7 +15408,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 724
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 724 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15420,7 +15424,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 725
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 725 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15434,7 +15438,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 726
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 726 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15448,7 +15452,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 727
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 727 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15464,7 +15468,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 728
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[112], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[112], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 728 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15484,7 +15488,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 729
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15493,7 +15497,7 @@ namespace mg5amcCpu
       jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15502,7 +15506,7 @@ namespace mg5amcCpu
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15518,7 +15522,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 730
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[17], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[17], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 730 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15538,7 +15542,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 731
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 731 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15558,7 +15562,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 732
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[96], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 732 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15578,7 +15582,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 733
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 733 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15594,7 +15598,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 734
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 734 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15614,7 +15618,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 735
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 735 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15630,7 +15634,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 736
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[96], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15639,7 +15643,7 @@ namespace mg5amcCpu
       jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[96], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15648,7 +15652,7 @@ namespace mg5amcCpu
       jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[96], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -15664,7 +15668,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 737
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[38] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15673,7 +15677,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[38] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -15682,7 +15686,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[81] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -15698,7 +15702,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 738
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[73], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[73], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
       jamp_sv[21] += amp_sv[0];
@@ -15715,7 +15719,7 @@ namespace mg5amcCpu
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[79], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[79], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[19] += amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
@@ -15732,7 +15736,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[80], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[80], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[18] += amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[22] -= amp_sv[0];
@@ -15753,10 +15757,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 739 OF 1240 ***
 
       // Wavefunction(s) for diagram number 739
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[77], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[92] );
+      helas_FFV1_1( w_fp[77], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[92] );
 
       // Amplitude(s) for diagram number 739
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 739 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15769,7 +15773,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 740
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 740 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15779,10 +15783,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 741 OF 1240 ***
 
       // Wavefunction(s) for diagram number 741
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[46], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_FFV1_2( w_fp[46], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 741
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 741 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15795,7 +15799,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 742
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 742 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15808,7 +15812,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 743
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 743 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15821,7 +15825,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 744
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 744 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15834,7 +15838,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 745
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[92], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[92], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 745 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15848,7 +15852,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 746
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 746 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15859,10 +15863,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 747 OF 1240 ***
 
       // Wavefunction(s) for diagram number 747
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[96] );
+      helas_VVV1P0_1( w_fp[0], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[96] );
 
       // Amplitude(s) for diagram number 747
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[77], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 747 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15878,7 +15882,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 748
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 748 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15891,7 +15895,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 749
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 749 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15901,10 +15905,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 750 OF 1240 ***
 
       // Wavefunction(s) for diagram number 750
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[38], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
+      helas_FFV1_2( w_fp[38], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
 
       // Amplitude(s) for diagram number 750
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 750 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15917,7 +15921,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 751
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 751 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15930,7 +15934,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 752
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 752 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15943,7 +15947,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 753
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 753 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15956,7 +15960,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 754
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[92], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[92], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 754 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15970,7 +15974,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 755
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 755 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15981,10 +15985,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 756 OF 1240 ***
 
       // Wavefunction(s) for diagram number 756
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[101] );
+      helas_VVV1P0_1( w_fp[0], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[101] );
 
       // Amplitude(s) for diagram number 756
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[77], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 756 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16000,7 +16004,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 757
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 757 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16013,7 +16017,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 758
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 758 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16023,10 +16027,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 759 OF 1240 ***
 
       // Wavefunction(s) for diagram number 759
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[41], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
+      helas_FFV1_2( w_fp[41], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
 
       // Amplitude(s) for diagram number 759
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 759 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16039,7 +16043,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 760
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 760 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16052,7 +16056,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 761
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 761 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16065,7 +16069,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 762
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 762 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16078,7 +16082,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 763
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[92], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[92], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 763 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16092,7 +16096,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 764
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 764 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16103,10 +16107,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 765 OF 1240 ***
 
       // Wavefunction(s) for diagram number 765
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[98] );
+      helas_VVV1P0_1( w_fp[0], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[98] );
 
       // Amplitude(s) for diagram number 765
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[77], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 765 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16122,7 +16126,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 766
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 766 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16136,7 +16140,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 767
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[92], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 767 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16152,7 +16156,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 768
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[98], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 768 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16172,7 +16176,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 769
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[85], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[85], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 769 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16188,7 +16192,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 770
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[34], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[34], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 770 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16208,7 +16212,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 771
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 771 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16219,12 +16223,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 772 OF 1240 ***
 
       // Wavefunction(s) for diagram number 772
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[85] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[85] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 772
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[85], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[85], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0];
@@ -16233,7 +16237,7 @@ namespace mg5amcCpu
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -16242,7 +16246,7 @@ namespace mg5amcCpu
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -16258,7 +16262,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 773
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 773 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16272,7 +16276,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 774
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[92], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 774 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16288,7 +16292,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 775
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[101], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[101], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 775 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16308,7 +16312,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 776
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 776 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16324,7 +16328,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 777
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[34], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[34], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 777 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16344,7 +16348,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 778
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 778 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16355,12 +16359,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 779 OF 1240 ***
 
       // Wavefunction(s) for diagram number 779
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[9] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[9] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
 
       // Amplitude(s) for diagram number 779
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -16369,7 +16373,7 @@ namespace mg5amcCpu
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[34] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -16378,7 +16382,7 @@ namespace mg5amcCpu
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[34] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -16394,7 +16398,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 780
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 780 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16408,7 +16412,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 781
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[92], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 781 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16424,7 +16428,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 782
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[96], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[96], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 782 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16444,7 +16448,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 783
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[87], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[87], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 783 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16460,7 +16464,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 784
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[34], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[34], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 784 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16480,7 +16484,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 785
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 785 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16491,12 +16495,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 786 OF 1240 ***
 
       // Wavefunction(s) for diagram number 786
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[87] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[34] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[86] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[87] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[34] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 786
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[87], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[87], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0];
@@ -16505,7 +16509,7 @@ namespace mg5amcCpu
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[34], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[34], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -16514,7 +16518,7 @@ namespace mg5amcCpu
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[40] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -16530,17 +16534,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 787
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[92], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] += amp_sv[0];
       jamp_sv[25] -= amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[92], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[25] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[92], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[28] += amp_sv[0];
@@ -16549,12 +16553,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 788 OF 1240 ***
 
       // Wavefunction(s) for diagram number 788
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[30], COUPs[0], 1.0, 0., 0., w_fp[92] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[31], COUPs[0], 1.0, 0., 0., w_fp[88] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[32], COUPs[0], 1.0, 0., 0., w_fp[106] );
+      helas_VVV1P0_1( w_fp[0], w_fp[30], COUPs[0], 1.0, 0., 0., w_fp[92] );
+      helas_VVV1P0_1( w_fp[0], w_fp[31], COUPs[0], 1.0, 0., 0., w_fp[88] );
+      helas_VVV1P0_1( w_fp[0], w_fp[32], COUPs[0], 1.0, 0., 0., w_fp[106] );
 
       // Amplitude(s) for diagram number 788
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[92], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[92], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0];
@@ -16563,7 +16567,7 @@ namespace mg5amcCpu
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0];
@@ -16572,7 +16576,7 @@ namespace mg5amcCpu
       jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[106], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[106], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -16585,10 +16589,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 789 OF 1240 ***
 
       // Wavefunction(s) for diagram number 789
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[52], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
+      helas_FFV1_2( w_fp[52], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
 
       // Amplitude(s) for diagram number 789
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 789 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16601,7 +16605,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 790
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 790 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16611,10 +16615,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 791 OF 1240 ***
 
       // Wavefunction(s) for diagram number 791
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[33], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
+      helas_FFV1_1( w_fp[33], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
 
       // Amplitude(s) for diagram number 791
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 791 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16627,7 +16631,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 792
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 792 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16640,7 +16644,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 793
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 793 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16653,7 +16657,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 794
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 794 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16666,7 +16670,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 795
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 795 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16680,7 +16684,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 796
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[114], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[114], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 796 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16694,7 +16698,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 797
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[33], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 797 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16710,7 +16714,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 798
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 798 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16723,7 +16727,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 799
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 799 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16733,10 +16737,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 800 OF 1240 ***
 
       // Wavefunction(s) for diagram number 800
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[39], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
+      helas_FFV1_1( w_fp[39], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
 
       // Amplitude(s) for diagram number 800
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 800 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16749,7 +16753,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 801
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 801 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16762,7 +16766,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 802
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 802 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16775,7 +16779,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 803
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 803 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16788,7 +16792,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 804
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 804 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16802,7 +16806,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 805
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[102], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[102], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 805 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16816,7 +16820,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 806
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[39], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 806 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16832,7 +16836,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 807
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 807 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16845,7 +16849,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 808
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 808 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16855,10 +16859,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 809 OF 1240 ***
 
       // Wavefunction(s) for diagram number 809
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[47], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] );
+      helas_FFV1_1( w_fp[47], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] );
 
       // Amplitude(s) for diagram number 809
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 809 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16871,7 +16875,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 810
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 810 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16884,7 +16888,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 811
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 811 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16897,7 +16901,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 812
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 812 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16910,7 +16914,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 813
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 813 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16924,7 +16928,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 814
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[113], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[113], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 814 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16938,7 +16942,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 815
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[47], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 815 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16954,7 +16958,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 816
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 816 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16968,7 +16972,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 817
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 817 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16984,7 +16988,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 818
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[98], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 818 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17004,7 +17008,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 819
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 819 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17020,7 +17024,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 820
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[103], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[103], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 820 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17040,7 +17044,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 821
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 821 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17054,7 +17058,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 822
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[85], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[85], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
@@ -17063,7 +17067,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -17072,7 +17076,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -17088,7 +17092,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 823
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 823 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17102,7 +17106,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 824
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 824 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17118,7 +17122,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 825
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[101], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[101], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 825 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17138,7 +17142,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 826
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 826 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17154,7 +17158,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 827
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[103], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[103], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 827 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17174,7 +17178,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 828
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 828 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17188,7 +17192,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 829
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -17197,7 +17201,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -17206,7 +17210,7 @@ namespace mg5amcCpu
       jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -17222,7 +17226,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 830
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 830 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17236,7 +17240,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 831
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 831 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17252,7 +17256,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 832
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[96], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[96], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 832 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17272,7 +17276,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 833
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 833 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17288,7 +17292,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 834
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[103], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[103], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 834 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17308,7 +17312,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 835
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 835 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17322,7 +17326,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 836
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[87], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[87], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
@@ -17331,7 +17335,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[34], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[34], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -17340,7 +17344,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -17356,17 +17360,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 837
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[64] += amp_sv[0];
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[64] -= amp_sv[0];
       jamp_sv[88] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
@@ -17378,7 +17382,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 838
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[92], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[92], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
@@ -17387,7 +17391,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
@@ -17396,7 +17400,7 @@ namespace mg5amcCpu
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[106], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[106], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -17409,10 +17413,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 839 OF 1240 ***
 
       // Wavefunction(s) for diagram number 839
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[90] );
+      helas_VVV1P0_1( w_fp[0], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[90] );
 
       // Amplitude(s) for diagram number 839
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[90], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 839 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17440,7 +17444,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 840
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[90], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 840 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17468,7 +17472,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 841
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -17485,7 +17489,7 @@ namespace mg5amcCpu
       jamp_sv[98] -= amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -17502,7 +17506,7 @@ namespace mg5amcCpu
       jamp_sv[115] += amp_sv[0];
       jamp_sv[117] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[1] += amp_sv[0];
       jamp_sv[6] += amp_sv[0];
@@ -17523,10 +17527,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 842 OF 1240 ***
 
       // Wavefunction(s) for diagram number 842
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[56] );
+      helas_VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[56] );
 
       // Amplitude(s) for diagram number 842
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 842 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17554,7 +17558,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 843
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 843 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17582,7 +17586,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 844
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[12] -= amp_sv[0];
@@ -17599,7 +17603,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[12] -= amp_sv[0];
@@ -17616,7 +17620,7 @@ namespace mg5amcCpu
       jamp_sv[95] += amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[1] += amp_sv[0];
       jamp_sv[6] += amp_sv[0];
@@ -17640,7 +17644,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 845
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[63], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[63], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 845 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17668,7 +17672,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 846
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[64], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[64], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 846 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17693,12 +17697,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 847 OF 1240 ***
 
       // Wavefunction(s) for diagram number 847
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[103] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[22] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[103] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[22] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 847
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[103], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[103], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[12] -= amp_sv[0];
@@ -17715,7 +17719,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -17732,7 +17736,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -17753,12 +17757,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 848 OF 1240 ***
 
       // Wavefunction(s) for diagram number 848
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[105] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[95] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[107] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[105] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[95] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[107] );
 
       // Amplitude(s) for diagram number 848
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[18] -= amp_sv[0];
@@ -17775,7 +17779,7 @@ namespace mg5amcCpu
       jamp_sv[95] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -17792,7 +17796,7 @@ namespace mg5amcCpu
       jamp_sv[91] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[98] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -17813,12 +17817,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 849 OF 1240 ***
 
       // Wavefunction(s) for diagram number 849
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[115] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[116] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[117] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[115] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[116] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[117] );
 
       // Amplitude(s) for diagram number 849
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[6], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[18] += amp_sv[0];
@@ -17835,7 +17839,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[6], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
       jamp_sv[16] += amp_sv[0];
@@ -17852,7 +17856,7 @@ namespace mg5amcCpu
       jamp_sv[105] += amp_sv[0];
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[6], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[12] -= amp_sv[0];
@@ -17873,12 +17877,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 850 OF 1240 ***
 
       // Wavefunction(s) for diagram number 850
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[118] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[119] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[120] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[118] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[119] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[120] );
 
       // Amplitude(s) for diagram number 850
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[12] += amp_sv[0];
@@ -17895,7 +17899,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[117] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
       jamp_sv[22] += amp_sv[0];
@@ -17912,7 +17916,7 @@ namespace mg5amcCpu
       jamp_sv[98] -= amp_sv[0];
       jamp_sv[100] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[12] -= amp_sv[0];
@@ -17936,7 +17940,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 851
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
@@ -17953,7 +17957,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
@@ -17970,7 +17974,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
       jamp_sv[22] += amp_sv[0];
@@ -17994,7 +17998,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 852
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[29], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[29], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 852 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18022,7 +18026,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 853
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[29], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[29], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 853 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18050,7 +18054,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 854
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[8], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[8], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 854 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18078,7 +18082,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 855
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[90], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 855 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18098,7 +18102,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 856
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[44], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[44], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 856 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18114,7 +18118,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 857
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 857 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18128,7 +18132,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 858
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 858 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18144,7 +18148,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 859
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 859 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18158,7 +18162,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 860
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[64], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[64], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 860 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18178,7 +18182,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 861
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0];
@@ -18187,7 +18191,7 @@ namespace mg5amcCpu
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -18196,7 +18200,7 @@ namespace mg5amcCpu
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -18212,7 +18216,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 862
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[39], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 862 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18228,7 +18232,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 863
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[102], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[102], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 863 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18242,7 +18246,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 864
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 864 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18256,7 +18260,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 865
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[90], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 865 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18276,7 +18280,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 866
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[50], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[50], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 866 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18292,7 +18296,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 867
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 867 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18306,7 +18310,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 868
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[113], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 868 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18322,7 +18326,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 869
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 869 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18336,7 +18340,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 870
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[63], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[63], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 870 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18356,7 +18360,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 871
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0];
@@ -18365,7 +18369,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -18374,7 +18378,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -18390,7 +18394,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 872
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[47], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 872 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18406,7 +18410,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 873
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[113], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[113], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 873 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18420,7 +18424,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 874
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 874 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18434,7 +18438,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 875
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[90], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 875 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18454,7 +18458,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 876
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 876 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18470,7 +18474,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 877
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 877 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18484,7 +18488,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 878
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 878 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18500,7 +18504,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 879
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 879 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18514,7 +18518,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 880
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[64], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[64], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 880 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18534,7 +18538,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 881
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
@@ -18543,7 +18547,7 @@ namespace mg5amcCpu
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -18552,7 +18556,7 @@ namespace mg5amcCpu
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -18568,7 +18572,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 882
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[90], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 882 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18588,7 +18592,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 883
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 883 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18604,7 +18608,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 884
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 884 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18618,7 +18622,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 885
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 885 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18634,7 +18638,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 886
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 886 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18648,7 +18652,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 887
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[63], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[63], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 887 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18668,7 +18672,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 888
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
@@ -18677,7 +18681,7 @@ namespace mg5amcCpu
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[14] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -18686,7 +18690,7 @@ namespace mg5amcCpu
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -18702,7 +18706,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 889
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[18], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[18], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 889 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18722,7 +18726,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 890
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 890 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18742,7 +18746,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 891
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[93], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[93], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 891 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18762,7 +18766,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 892
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 892 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18782,7 +18786,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 893
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 893 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18798,7 +18802,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 894
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 894 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18811,10 +18815,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 895 OF 1240 ***
 
       // Wavefunction(s) for diagram number 895
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[65] );
+      helas_VVV1P0_1( w_fp[0], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[65] );
 
       // Amplitude(s) for diagram number 895
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[65], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 895 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18842,7 +18846,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 896
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[65], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 896 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18870,7 +18874,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 897
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[37] += amp_sv[0];
@@ -18887,7 +18891,7 @@ namespace mg5amcCpu
       jamp_sv[100] -= amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[12] += amp_sv[0];
       jamp_sv[36] += amp_sv[0];
@@ -18904,7 +18908,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[3] += amp_sv[0];
       jamp_sv[12] += amp_sv[0];
@@ -18928,7 +18932,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 898
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 898 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18956,7 +18960,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 899
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 899 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18984,7 +18988,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 900
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
@@ -19001,7 +19005,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
@@ -19018,7 +19022,7 @@ namespace mg5amcCpu
       jamp_sv[83] += amp_sv[0];
       jamp_sv[107] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[3] += amp_sv[0];
       jamp_sv[10] += amp_sv[0];
@@ -19042,7 +19046,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 901
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[69], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[69], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 901 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19070,7 +19074,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 902
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[70], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[70], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 902 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19095,12 +19099,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 903 OF 1240 ***
 
       // Wavefunction(s) for diagram number 903
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[93] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[90] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[93] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[90] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 903
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[93], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[93], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
@@ -19117,7 +19121,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
       jamp_sv[36] += amp_sv[0];
@@ -19134,7 +19138,7 @@ namespace mg5amcCpu
       jamp_sv[109] += amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[12] += amp_sv[0];
       jamp_sv[36] += amp_sv[0];
@@ -19155,12 +19159,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 904 OF 1240 ***
 
       // Wavefunction(s) for diagram number 904
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[103] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[63] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[103] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[63] );
 
       // Amplitude(s) for diagram number 904
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[19] -= amp_sv[0];
@@ -19177,7 +19181,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[103], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[103], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
       jamp_sv[37] += amp_sv[0];
@@ -19194,7 +19198,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[97] += amp_sv[0];
       jamp_sv[100] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[63], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[63], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[37] += amp_sv[0];
@@ -19215,12 +19219,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 905 OF 1240 ***
 
       // Wavefunction(s) for diagram number 905
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] );
 
       // Amplitude(s) for diagram number 905
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[19] += amp_sv[0];
@@ -19237,7 +19241,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
       jamp_sv[10] += amp_sv[0];
@@ -19254,7 +19258,7 @@ namespace mg5amcCpu
       jamp_sv[107] += amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[6], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
@@ -19278,7 +19282,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 906
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -19295,7 +19299,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
@@ -19312,7 +19316,7 @@ namespace mg5amcCpu
       jamp_sv[98] -= amp_sv[0];
       jamp_sv[99] += amp_sv[0];
       jamp_sv[100] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
@@ -19336,7 +19340,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 907
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[12] -= amp_sv[0];
@@ -19353,7 +19357,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -19370,7 +19374,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
       jamp_sv[20] += amp_sv[0];
@@ -19394,7 +19398,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 908
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[27], w_fp[65], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[27], w_fp[65], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 908 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19422,7 +19426,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 909
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[27], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[27], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 909 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19450,7 +19454,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 910
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[8], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[8], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 910 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19478,7 +19482,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 911
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[65], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 911 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19498,7 +19502,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 912
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[36], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[36], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 912 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19514,7 +19518,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 913
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 913 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19528,7 +19532,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 914
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[114], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 914 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19544,7 +19548,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 915
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 915 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19558,7 +19562,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 916
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[70], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[70], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 916 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19578,7 +19582,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 917
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0];
@@ -19587,7 +19591,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -19596,7 +19600,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -19612,7 +19616,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 918
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[33], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 918 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19628,7 +19632,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 919
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[114], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[114], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 919 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19642,7 +19646,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 920
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 920 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19656,7 +19660,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 921
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[65], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 921 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19676,7 +19680,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 922
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[49], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[49], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 922 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19692,7 +19696,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 923
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 923 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19706,7 +19710,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 924
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[113], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 924 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19722,7 +19726,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 925
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 925 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19736,7 +19740,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 926
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[69], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[69], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 926 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19756,7 +19760,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 927
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -19765,7 +19769,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -19774,7 +19778,7 @@ namespace mg5amcCpu
       jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -19790,7 +19794,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 928
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[47], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 928 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19806,7 +19810,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 929
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[113], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[113], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 929 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19820,7 +19824,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 930
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 930 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19834,7 +19838,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 931
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[65], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 931 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19854,7 +19858,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 932
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 932 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19870,7 +19874,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 933
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 933 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19884,7 +19888,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 934
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 934 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19900,7 +19904,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 935
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 935 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19914,7 +19918,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 936
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[70], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[70], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 936 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19934,7 +19938,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 937
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
@@ -19943,7 +19947,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -19952,7 +19956,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -19968,7 +19972,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 938
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[65], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 938 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19988,7 +19992,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 939
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 939 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20004,7 +20008,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 940
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 940 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20018,7 +20022,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 941
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 941 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20034,7 +20038,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 942
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 942 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20048,7 +20052,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 943
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[69], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[69], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 943 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20068,7 +20072,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 944
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -20077,7 +20081,7 @@ namespace mg5amcCpu
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -20086,7 +20090,7 @@ namespace mg5amcCpu
       jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -20102,7 +20106,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 945
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[15], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[15], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 945 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20122,7 +20126,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 946
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 946 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20142,7 +20146,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 947
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[94], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[94], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 947 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20162,7 +20166,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 948
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 948 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20182,7 +20186,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 949
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 949 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20198,7 +20202,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 950
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 950 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20211,10 +20215,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 951 OF 1240 ***
 
       // Wavefunction(s) for diagram number 951
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[71] );
+      helas_VVV1P0_1( w_fp[0], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[71] );
 
       // Amplitude(s) for diagram number 951
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[71], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 951 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20242,7 +20246,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 952
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[71], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 952 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20270,7 +20274,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 953
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
       jamp_sv[43] += amp_sv[0];
@@ -20287,7 +20291,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[18] += amp_sv[0];
       jamp_sv[42] += amp_sv[0];
@@ -20304,7 +20308,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
       jamp_sv[18] += amp_sv[0];
@@ -20328,7 +20332,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 954
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 954 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20356,7 +20360,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 955
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 955 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20384,7 +20388,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 956
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[10] += amp_sv[0];
@@ -20401,7 +20405,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[105] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[5] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
@@ -20418,7 +20422,7 @@ namespace mg5amcCpu
       jamp_sv[83] += amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[107] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
       jamp_sv[8] += amp_sv[0];
@@ -20442,7 +20446,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 957
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[74], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[74], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 957 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20470,7 +20474,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 958
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[75], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[75], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 958 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20495,12 +20499,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 959 OF 1240 ***
 
       // Wavefunction(s) for diagram number 959
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[94] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[65] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[94] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[65] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 959
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[94], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[94], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[10] += amp_sv[0];
@@ -20517,7 +20521,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[65], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[65], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[10] += amp_sv[0];
       jamp_sv[42] += amp_sv[0];
@@ -20534,7 +20538,7 @@ namespace mg5amcCpu
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[18] += amp_sv[0];
       jamp_sv[42] += amp_sv[0];
@@ -20555,12 +20559,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 960 OF 1240 ***
 
       // Wavefunction(s) for diagram number 960
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[90] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[93] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[69] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[90] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[93] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[69] );
 
       // Amplitude(s) for diagram number 960
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] += amp_sv[0];
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[16] += amp_sv[0];
@@ -20577,7 +20581,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[93], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[93], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[16] += amp_sv[0];
       jamp_sv[43] += amp_sv[0];
@@ -20594,7 +20598,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[69], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[69], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
       jamp_sv[43] += amp_sv[0];
@@ -20618,7 +20622,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 961
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
@@ -20635,7 +20639,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[5], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
@@ -20652,7 +20656,7 @@ namespace mg5amcCpu
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[107] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[5], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
@@ -20676,7 +20680,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 962
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -20693,7 +20697,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
@@ -20710,7 +20714,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[105] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[10] += amp_sv[0];
@@ -20734,7 +20738,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 963
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[18] -= amp_sv[0];
@@ -20751,7 +20755,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -20768,7 +20772,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[9] += amp_sv[0];
       jamp_sv[14] += amp_sv[0];
@@ -20792,7 +20796,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 964
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[24], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[24], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 964 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20820,7 +20824,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 965
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[24], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[24], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 965 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20848,7 +20852,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 966
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[8], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[8], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 966 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20876,7 +20880,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 967
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[71], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 967 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20896,7 +20900,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 968
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[35], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[35], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 968 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20912,7 +20916,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 969
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 969 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20926,7 +20930,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 970
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[114], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 970 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20942,7 +20946,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 971
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 971 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20956,7 +20960,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 972
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[75], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[75], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 972 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20976,7 +20980,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 973
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -20985,7 +20989,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -20994,7 +20998,7 @@ namespace mg5amcCpu
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -21010,7 +21014,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 974
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[33], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 974 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21026,7 +21030,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 975
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[114], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[114], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 975 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21040,7 +21044,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 976
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 976 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21054,7 +21058,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 977
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[71], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 977 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21074,7 +21078,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 978
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[43], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[43], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 978 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21090,7 +21094,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 979
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 979 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21104,7 +21108,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 980
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 980 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21120,7 +21124,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 981
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 981 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21134,7 +21138,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 982
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[74], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[74], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 982 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21154,7 +21158,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 983
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[94], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[94], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -21163,7 +21167,7 @@ namespace mg5amcCpu
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -21172,7 +21176,7 @@ namespace mg5amcCpu
       jamp_sv[85] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -21188,7 +21192,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 984
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 984 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21204,7 +21208,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 985
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[102], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[102], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 985 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21218,7 +21222,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 986
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 986 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21232,7 +21236,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 987
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[71], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 987 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21252,7 +21256,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 988
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 988 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21268,7 +21272,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 989
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 989 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21282,7 +21286,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 990
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 990 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21298,7 +21302,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 991
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 991 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21312,7 +21316,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 992
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[75], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[75], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 992 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21332,7 +21336,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 993
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -21341,7 +21345,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -21350,7 +21354,7 @@ namespace mg5amcCpu
       jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -21366,7 +21370,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 994
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[71], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 994 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21386,7 +21390,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 995
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 995 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21402,7 +21406,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 996
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 996 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21416,7 +21420,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 997
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 997 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21432,7 +21436,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 998
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 998 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21446,7 +21450,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 999
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[74], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[74], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 999 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21466,7 +21470,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1000
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[94], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[94], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -21475,7 +21479,7 @@ namespace mg5amcCpu
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -21484,7 +21488,7 @@ namespace mg5amcCpu
       jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -21500,7 +21504,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1001
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[17], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[17], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1001 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21520,7 +21524,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1002
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1002 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21540,7 +21544,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1003
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[97], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[97], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1003 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21560,7 +21564,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1004
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1004 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21580,7 +21584,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1005
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1005 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21596,7 +21600,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1006
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1006 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21612,7 +21616,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1007
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1007 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21640,7 +21644,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1008
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1008 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21668,7 +21672,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1009
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -21685,7 +21689,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -21702,7 +21706,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
@@ -21726,7 +21730,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1010
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[98], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1010 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21754,7 +21758,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1011
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[98], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1011 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21782,7 +21786,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1012
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
@@ -21799,7 +21803,7 @@ namespace mg5amcCpu
       jamp_sv[101] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[9] += amp_sv[0];
       jamp_sv[14] += amp_sv[0];
@@ -21816,7 +21820,7 @@ namespace mg5amcCpu
       jamp_sv[103] += amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -21840,7 +21844,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1013
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[108], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[108], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1013 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21868,7 +21872,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1014
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[59], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[59], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1014 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21893,12 +21897,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1015 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1015
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[11] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[42] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[76] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[11] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[42] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[76] );
 
       // Amplitude(s) for diagram number 1015
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[6], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
@@ -21915,7 +21919,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[6], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
@@ -21932,7 +21936,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[6], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
@@ -21953,12 +21957,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1016 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1016
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[97] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[71] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[97] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[71] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 1016
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[97], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[97], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -21975,7 +21979,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -21992,7 +21996,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -22016,7 +22020,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1017
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[8] += amp_sv[0];
@@ -22033,7 +22037,7 @@ namespace mg5amcCpu
       jamp_sv[107] += amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
       jamp_sv[21] += amp_sv[0];
@@ -22050,7 +22054,7 @@ namespace mg5amcCpu
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -22074,7 +22078,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1018
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[85], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[85], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[21] += amp_sv[0];
@@ -22091,7 +22095,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[21] += amp_sv[0];
       jamp_sv[23] -= amp_sv[0];
       jamp_sv[28] -= amp_sv[0];
@@ -22108,7 +22112,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
@@ -22132,7 +22136,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1019
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1019 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22160,7 +22164,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1020
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1020 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22188,7 +22192,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1021
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -22205,7 +22209,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -22222,7 +22226,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -22246,7 +22250,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1022
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[101], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[101], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1022 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22274,7 +22278,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1023
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[101], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[101], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1023 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22302,7 +22306,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1024
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[11] += amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[25] -= amp_sv[0];
@@ -22319,7 +22323,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
       jamp_sv[20] += amp_sv[0];
@@ -22336,7 +22340,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
       jamp_sv[25] += amp_sv[0];
@@ -22360,7 +22364,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1025
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[108], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[108], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1025 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22388,7 +22392,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1026
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[68], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[68], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1026 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22416,7 +22420,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1027
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[5], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[11] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
@@ -22433,7 +22437,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[5], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -22450,7 +22454,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[5], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -22471,12 +22475,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1028 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1028
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[10] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[16] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[10] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[16] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 1028
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -22493,7 +22497,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
       jamp_sv[25] += amp_sv[0];
@@ -22510,7 +22514,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[4] += amp_sv[0];
       jamp_sv[25] += amp_sv[0];
@@ -22534,7 +22538,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1029
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[4] += amp_sv[0];
       jamp_sv[10] += amp_sv[0];
@@ -22551,7 +22555,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[15] += amp_sv[0];
@@ -22568,7 +22572,7 @@ namespace mg5amcCpu
       jamp_sv[77] += amp_sv[0];
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -22592,7 +22596,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1030
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
@@ -22609,7 +22613,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[15] += amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[26] -= amp_sv[0];
@@ -22626,7 +22630,7 @@ namespace mg5amcCpu
       jamp_sv[77] += amp_sv[0];
       jamp_sv[109] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[11] += amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[25] -= amp_sv[0];
@@ -22650,7 +22654,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1031
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1031 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22678,7 +22682,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1032
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1032 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22706,7 +22710,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1033
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
@@ -22723,7 +22727,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
@@ -22740,7 +22744,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[1] += amp_sv[0];
       jamp_sv[3] += amp_sv[0];
@@ -22764,7 +22768,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1034
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[96], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[96], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1034 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22792,7 +22796,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1035
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[96], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[96], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1035 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22820,7 +22824,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1036
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[17] += amp_sv[0];
       jamp_sv[23] -= amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
@@ -22837,7 +22841,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
       jamp_sv[22] += amp_sv[0];
@@ -22854,7 +22858,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
       jamp_sv[27] += amp_sv[0];
@@ -22878,7 +22882,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1037
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[108], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[108], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1037 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22906,7 +22910,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1038
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[67], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[67], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1038 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22934,7 +22938,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1039
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[29], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
@@ -22951,7 +22955,7 @@ namespace mg5amcCpu
       jamp_sv[70] += amp_sv[0];
       jamp_sv[94] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[29], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[1] += amp_sv[0];
       jamp_sv[3] += amp_sv[0];
@@ -22968,7 +22972,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[29], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[1] += amp_sv[0];
       jamp_sv[3] += amp_sv[0];
@@ -22989,12 +22993,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1040 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1040
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[76] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[42] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[11] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[76] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[42] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 1040
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
@@ -23011,7 +23015,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
       jamp_sv[27] += amp_sv[0];
@@ -23028,7 +23032,7 @@ namespace mg5amcCpu
       jamp_sv[70] += amp_sv[0];
       jamp_sv[90] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
       jamp_sv[27] += amp_sv[0];
@@ -23052,7 +23056,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1041
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
       jamp_sv[16] += amp_sv[0];
@@ -23069,7 +23073,7 @@ namespace mg5amcCpu
       jamp_sv[71] -= amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[29], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[9] += amp_sv[0];
@@ -23086,7 +23090,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[29], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
@@ -23110,7 +23114,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1042
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[87], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[87], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
       jamp_sv[17] += amp_sv[0];
@@ -23127,7 +23131,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[34], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[34], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[17] += amp_sv[0];
       jamp_sv[23] -= amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
@@ -23144,7 +23148,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
@@ -23168,7 +23172,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1043
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -23185,7 +23189,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -23202,7 +23206,7 @@ namespace mg5amcCpu
       jamp_sv[71] -= amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
       jamp_sv[17] += amp_sv[0];
@@ -23219,7 +23223,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -23236,7 +23240,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -23253,7 +23257,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[11] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
@@ -23270,7 +23274,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
@@ -23287,7 +23291,7 @@ namespace mg5amcCpu
       jamp_sv[113] += amp_sv[0];
       jamp_sv[118] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
@@ -23304,7 +23308,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
@@ -23328,7 +23332,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1044
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[30], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[30], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -23345,7 +23349,7 @@ namespace mg5amcCpu
       jamp_sv[71] -= amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[31], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[31], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -23362,7 +23366,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[32], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[32], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
@@ -23386,7 +23390,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1045
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
       jamp_sv[17] += amp_sv[0];
@@ -23403,7 +23407,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[11] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
@@ -23420,7 +23424,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[106], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[106], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
@@ -23444,7 +23448,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1046
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1046 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23457,7 +23461,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1047
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1047 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23470,7 +23474,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1048
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1048 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23483,7 +23487,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1049
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1049 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23496,7 +23500,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1050
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1050 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23509,7 +23513,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1051
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1051 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23522,7 +23526,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1052
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1052 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23535,7 +23539,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1053
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1053 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23548,7 +23552,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1054
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1054 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23561,7 +23565,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1055
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1055 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23574,7 +23578,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1056
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1056 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23587,7 +23591,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1057
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1057 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23600,7 +23604,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1058
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[114], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1058 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23616,7 +23620,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1059
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1059 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23630,7 +23634,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1060
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[100], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[100], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1060 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23646,7 +23650,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1061
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[96], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[96], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1061 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23666,7 +23670,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1062
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1062 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23680,7 +23684,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1063
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[67], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[67], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1063 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23700,7 +23704,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1064
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[76], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[76], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0];
@@ -23709,7 +23713,7 @@ namespace mg5amcCpu
       jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -23718,7 +23722,7 @@ namespace mg5amcCpu
       jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[64] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -23734,7 +23738,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1065
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1065 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23747,7 +23751,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1066
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1066 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23760,7 +23764,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1067
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1067 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23773,7 +23777,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1068
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1068 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23786,7 +23790,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1069
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1069 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23799,7 +23803,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1070
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1070 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23812,7 +23816,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1071
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1071 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23825,7 +23829,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1072
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1072 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23838,7 +23842,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1073
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1073 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23851,7 +23855,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1074
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1074 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23864,7 +23868,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1075
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1075 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23877,7 +23881,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1076
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1076 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23890,7 +23894,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1077
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1077 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23906,7 +23910,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1078
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1078 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23920,7 +23924,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1079
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[89], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[89], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1079 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23936,7 +23940,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1080
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[101], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[101], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1080 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23956,7 +23960,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1081
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1081 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23970,7 +23974,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1082
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[68], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[68], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1082 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23990,7 +23994,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1083
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0];
@@ -23999,7 +24003,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -24008,7 +24012,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -24024,7 +24028,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1084
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1084 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24037,7 +24041,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1085
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1085 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24050,7 +24054,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1086
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1086 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24063,7 +24067,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1087
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1087 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24076,7 +24080,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1088
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1088 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24089,7 +24093,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1089
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1089 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24102,7 +24106,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1090
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1090 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24115,7 +24119,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1091
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1091 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24128,7 +24132,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1092
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1092 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24141,7 +24145,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1093
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1093 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24154,7 +24158,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1094
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1094 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24167,7 +24171,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1095
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1095 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24180,7 +24184,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1096
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[113], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1096 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24196,7 +24200,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1097
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1097 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24210,7 +24214,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1098
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[91], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[91], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1098 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24226,7 +24230,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1099
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[98], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1099 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24246,7 +24250,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1100
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1100 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24260,7 +24264,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1101
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[59], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[59], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1101 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24280,7 +24284,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1102
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0];
@@ -24289,7 +24293,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -24298,7 +24302,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -24314,7 +24318,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1103
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1103 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24330,7 +24334,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1104
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1104 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24344,7 +24348,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1105
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1105 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24360,7 +24364,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1106
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[96], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[96], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1106 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24380,7 +24384,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1107
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1107 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24394,7 +24398,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1108
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[67], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[67], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1108 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24414,7 +24418,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1109
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[76], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[76], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
@@ -24423,7 +24427,7 @@ namespace mg5amcCpu
       jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -24432,7 +24436,7 @@ namespace mg5amcCpu
       jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -24448,7 +24452,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1110
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1110 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24464,7 +24468,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1111
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1111 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24478,7 +24482,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1112
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1112 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24494,7 +24498,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1113
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[101], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[101], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1113 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24514,7 +24518,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1114
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1114 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24528,7 +24532,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1115
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[68], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[68], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1115 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24548,7 +24552,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1116
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
@@ -24557,7 +24561,7 @@ namespace mg5amcCpu
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -24566,7 +24570,7 @@ namespace mg5amcCpu
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -24582,7 +24586,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1117
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1117 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24598,7 +24602,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1118
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1118 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24612,7 +24616,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1119
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1119 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24628,7 +24632,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1120
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[98], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1120 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24648,7 +24652,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1121
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1121 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24662,7 +24666,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1122
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[59], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[59], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1122 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24682,7 +24686,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1123
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -24691,7 +24695,7 @@ namespace mg5amcCpu
       jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[14] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -24700,7 +24704,7 @@ namespace mg5amcCpu
       jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -24713,12 +24717,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1124 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1124
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[71] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[97] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[71] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[97] );
 
       // Amplitude(s) for diagram number 1124
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
@@ -24735,7 +24739,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -24752,7 +24756,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -24769,7 +24773,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -24786,7 +24790,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -24803,7 +24807,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -24820,7 +24824,7 @@ namespace mg5amcCpu
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[1] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -24837,7 +24841,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[118] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -24854,7 +24858,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[118] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -24875,12 +24879,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1125 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1125
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[59] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[97], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[60] );
+      helas_VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[59] );
+      helas_VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] );
+      helas_VVV1P0_1( w_fp[97], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[60] );
 
       // Amplitude(s) for diagram number 1125
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[59], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[59], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -24897,7 +24901,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -24914,7 +24918,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -24935,12 +24939,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1126 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1126
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[21], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[17] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[71], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[98] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[97], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[111] );
+      helas_VVV1P0_1( w_fp[21], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[17] );
+      helas_VVV1P0_1( w_fp[71], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[98] );
+      helas_VVV1P0_1( w_fp[97], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 1126
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -24957,7 +24961,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -24974,7 +24978,7 @@ namespace mg5amcCpu
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -24998,7 +25002,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1127
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[21], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
@@ -25015,7 +25019,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[71], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -25032,7 +25036,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[97], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[97], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[1] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -25053,22 +25057,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 1128 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1128
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
+      helas_FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
+      helas_FFV1_2( w_fp[3], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
 
       // Amplitude(s) for diagram number 1128
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[90] += amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[68], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[68], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[94] += amp_sv[0];
@@ -25080,7 +25084,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1129
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25089,7 +25093,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25098,7 +25102,7 @@ namespace mg5amcCpu
       jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -25114,17 +25118,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1130
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] += amp_sv[0];
       jamp_sv[74] -= amp_sv[0];
       jamp_sv[80] -= amp_sv[0];
       jamp_sv[86] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[74] -= amp_sv[0];
       jamp_sv[78] += amp_sv[0];
       jamp_sv[80] -= amp_sv[0];
       jamp_sv[84] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[39], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] -= amp_sv[0];
       jamp_sv[78] += amp_sv[0];
       jamp_sv[84] += amp_sv[0];
@@ -25136,17 +25140,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1131
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[114] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[68], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[68], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[118] += amp_sv[0];
@@ -25158,7 +25162,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1132
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25167,7 +25171,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25176,7 +25180,7 @@ namespace mg5amcCpu
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -25192,17 +25196,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1133
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] += amp_sv[0];
       jamp_sv[98] -= amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[98] -= amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[108] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[47], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] -= amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[108] += amp_sv[0];
@@ -25211,22 +25215,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 1134 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1134
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
+      helas_FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_FFV1_1( w_fp[2], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
 
       // Amplitude(s) for diagram number 1134
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[23], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[23], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
       jamp_sv[55] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[21], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[21], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
       jamp_sv[49] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[71], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[71], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -25238,7 +25242,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1135
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25247,7 +25251,7 @@ namespace mg5amcCpu
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25256,7 +25260,7 @@ namespace mg5amcCpu
       jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -25272,17 +25276,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1136
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
       jamp_sv[54] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[21], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[21], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
       jamp_sv[48] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[71], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[71], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -25294,7 +25298,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1137
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25303,7 +25307,7 @@ namespace mg5amcCpu
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25312,7 +25316,7 @@ namespace mg5amcCpu
       jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -25328,7 +25332,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1138
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[23], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25337,7 +25341,7 @@ namespace mg5amcCpu
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[55] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[21], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[21], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -25346,7 +25350,7 @@ namespace mg5amcCpu
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[71], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[71], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -25362,7 +25366,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1139
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25371,7 +25375,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25380,7 +25384,7 @@ namespace mg5amcCpu
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[68], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[68], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -25393,12 +25397,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1140 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1140
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[68] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[29] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[10] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[68] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[29] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 1140
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[12] -= amp_sv[0];
@@ -25415,7 +25419,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
@@ -25432,7 +25436,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -25449,7 +25453,7 @@ namespace mg5amcCpu
       jamp_sv[100] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[26] += amp_sv[0];
@@ -25466,7 +25470,7 @@ namespace mg5amcCpu
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
@@ -25483,7 +25487,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -25500,7 +25504,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[3] += amp_sv[0];
       jamp_sv[26] += amp_sv[0];
@@ -25517,7 +25521,7 @@ namespace mg5amcCpu
       jamp_sv[110] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -25534,7 +25538,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -25555,12 +25559,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1141 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1141
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[16] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[71] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[10], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[21] );
+      helas_VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[16] );
+      helas_VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[71] );
+      helas_VVV1P0_1( w_fp[10], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 1141
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
@@ -25577,7 +25581,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
@@ -25594,7 +25598,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -25615,12 +25619,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1142 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1142
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[68], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[29], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[60] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[10], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[20] );
+      helas_VVV1P0_1( w_fp[68], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_VVV1P0_1( w_fp[29], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[60] );
+      helas_VVV1P0_1( w_fp[10], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[20] );
 
       // Amplitude(s) for diagram number 1142
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -25637,7 +25641,7 @@ namespace mg5amcCpu
       jamp_sv[100] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -25654,7 +25658,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -25678,7 +25682,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1143
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[68], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[68], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[12] -= amp_sv[0];
@@ -25695,7 +25699,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[29], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[29], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[26] += amp_sv[0];
@@ -25712,7 +25716,7 @@ namespace mg5amcCpu
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[10], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[3] += amp_sv[0];
       jamp_sv[26] += amp_sv[0];
@@ -25733,22 +25737,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 1144 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1144
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[111] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
+      helas_FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
+      helas_FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[111] );
+      helas_FFV1_2( w_fp[3], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
 
       // Amplitude(s) for diagram number 1144
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[59], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[59], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[66] += amp_sv[0];
       jamp_sv[67] -= amp_sv[0];
       jamp_sv[69] -= amp_sv[0];
       jamp_sv[71] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[111], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[111], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[67] -= amp_sv[0];
       jamp_sv[68] += amp_sv[0];
       jamp_sv[69] -= amp_sv[0];
       jamp_sv[70] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[98], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[66] -= amp_sv[0];
       jamp_sv[68] += amp_sv[0];
       jamp_sv[70] += amp_sv[0];
@@ -25760,7 +25764,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1145
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25769,7 +25773,7 @@ namespace mg5amcCpu
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25778,7 +25782,7 @@ namespace mg5amcCpu
       jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -25794,17 +25798,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1146
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] += amp_sv[0];
       jamp_sv[50] -= amp_sv[0];
       jamp_sv[56] -= amp_sv[0];
       jamp_sv[62] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[50] -= amp_sv[0];
       jamp_sv[54] += amp_sv[0];
       jamp_sv[56] -= amp_sv[0];
       jamp_sv[60] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[33], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] -= amp_sv[0];
       jamp_sv[54] += amp_sv[0];
       jamp_sv[60] += amp_sv[0];
@@ -25816,17 +25820,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1147
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[59], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[59], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[108] += amp_sv[0];
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[111], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[111], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[98], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[108] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
@@ -25838,7 +25842,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1148
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25847,7 +25851,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25856,7 +25860,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -25872,17 +25876,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1149
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[47], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[97] += amp_sv[0];
       jamp_sv[100] -= amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[47], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[100] -= amp_sv[0];
       jamp_sv[103] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[114] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[47], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[103] += amp_sv[0];
       jamp_sv[114] += amp_sv[0];
@@ -25891,22 +25895,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 1150 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1150
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] );
+      helas_FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
+      helas_FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
+      helas_FFV1_1( w_fp[2], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] );
 
       // Amplitude(s) for diagram number 1150
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
       jamp_sv[79] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[68], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[68], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
       jamp_sv[73] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[29], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[29], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[73] += amp_sv[0];
@@ -25918,7 +25922,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1151
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25927,7 +25931,7 @@ namespace mg5amcCpu
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25936,7 +25940,7 @@ namespace mg5amcCpu
       jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -25952,17 +25956,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1152
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[17], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[17], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
       jamp_sv[78] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[68], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[68], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
       jamp_sv[72] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[29], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[29], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[72] += amp_sv[0];
@@ -25974,7 +25978,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1153
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25983,7 +25987,7 @@ namespace mg5amcCpu
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25992,7 +25996,7 @@ namespace mg5amcCpu
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26008,7 +26012,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1154
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[17], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[17], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26017,7 +26021,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[79] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[68], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[68], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26026,7 +26030,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[29], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[29], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26042,7 +26046,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1155
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[59], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[59], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26051,7 +26055,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[111], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[111], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26060,7 +26064,7 @@ namespace mg5amcCpu
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[98], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26073,12 +26077,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1156 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1156
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[98] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[27] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[98] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[27] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 1156
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[18] -= amp_sv[0];
@@ -26095,7 +26099,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
@@ -26112,7 +26116,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[102] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[5] += amp_sv[0];
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
@@ -26129,7 +26133,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[103] += amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
       jamp_sv[28] += amp_sv[0];
@@ -26146,7 +26150,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[97] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
@@ -26163,7 +26167,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[96] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
@@ -26180,7 +26184,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[97] += amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
       jamp_sv[28] += amp_sv[0];
@@ -26197,7 +26201,7 @@ namespace mg5amcCpu
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[103] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -26214,7 +26218,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -26235,12 +26239,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1157 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1157
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[98], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[59] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[27], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[29] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[111], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[68] );
+      helas_VVV1P0_1( w_fp[98], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[59] );
+      helas_VVV1P0_1( w_fp[27], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[29] );
+      helas_VVV1P0_1( w_fp[111], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[68] );
 
       // Amplitude(s) for diagram number 1157
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[59], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[59], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
@@ -26257,7 +26261,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[102] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
@@ -26274,7 +26278,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[96] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -26295,12 +26299,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1158 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1158
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[98], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[17] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[21] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[111], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[71] );
+      helas_VVV1P0_1( w_fp[98], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[17] );
+      helas_VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[21] );
+      helas_VVV1P0_1( w_fp[111], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[71] );
 
       // Amplitude(s) for diagram number 1158
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] += amp_sv[0];
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
@@ -26317,7 +26321,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[103] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
@@ -26334,7 +26338,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[97] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -26358,7 +26362,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1159
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[98], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[18] -= amp_sv[0];
@@ -26375,7 +26379,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[27], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
       jamp_sv[28] += amp_sv[0];
@@ -26392,7 +26396,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[97] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[111], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[111], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
       jamp_sv[28] += amp_sv[0];
@@ -26413,22 +26417,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 1160 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1160
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
+      helas_FFV1_2( w_fp[3], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
+      helas_FFV1_2( w_fp[3], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
 
       // Amplitude(s) for diagram number 1160
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[60] += amp_sv[0];
       jamp_sv[61] -= amp_sv[0];
       jamp_sv[63] -= amp_sv[0];
       jamp_sv[65] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[20], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[61] -= amp_sv[0];
       jamp_sv[62] += amp_sv[0];
       jamp_sv[63] -= amp_sv[0];
       jamp_sv[64] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[60] -= amp_sv[0];
       jamp_sv[62] += amp_sv[0];
       jamp_sv[64] += amp_sv[0];
@@ -26440,7 +26444,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1161
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26449,7 +26453,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26458,7 +26462,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26474,17 +26478,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1162
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[33], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[49] += amp_sv[0];
       jamp_sv[52] -= amp_sv[0];
       jamp_sv[58] -= amp_sv[0];
       jamp_sv[68] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[33], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[52] -= amp_sv[0];
       jamp_sv[55] += amp_sv[0];
       jamp_sv[58] -= amp_sv[0];
       jamp_sv[66] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[33], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[49] -= amp_sv[0];
       jamp_sv[55] += amp_sv[0];
       jamp_sv[66] += amp_sv[0];
@@ -26496,17 +26500,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1163
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[84] += amp_sv[0];
       jamp_sv[85] -= amp_sv[0];
       jamp_sv[87] -= amp_sv[0];
       jamp_sv[89] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[20], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[85] -= amp_sv[0];
       jamp_sv[86] += amp_sv[0];
       jamp_sv[87] -= amp_sv[0];
       jamp_sv[88] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[84] -= amp_sv[0];
       jamp_sv[86] += amp_sv[0];
       jamp_sv[88] += amp_sv[0];
@@ -26518,7 +26522,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1164
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26527,7 +26531,7 @@ namespace mg5amcCpu
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26536,7 +26540,7 @@ namespace mg5amcCpu
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26552,17 +26556,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1165
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[73] += amp_sv[0];
       jamp_sv[76] -= amp_sv[0];
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[76] -= amp_sv[0];
       jamp_sv[79] += amp_sv[0];
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[90] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[73] -= amp_sv[0];
       jamp_sv[79] += amp_sv[0];
       jamp_sv[90] += amp_sv[0];
@@ -26571,22 +26575,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 1166 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1166
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
+      helas_FFV1_1( w_fp[2], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
+      helas_FFV1_1( w_fp[2], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
 
       // Amplitude(s) for diagram number 1166
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[5] += amp_sv[0];
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
       jamp_sv[103] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[98], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[98], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
       jamp_sv[97] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[27], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[27], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[97] += amp_sv[0];
@@ -26598,7 +26602,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1167
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26607,7 +26611,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26616,7 +26620,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26632,17 +26636,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1168
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
       jamp_sv[102] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[98], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[98], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
       jamp_sv[96] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[27], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[27], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
@@ -26654,7 +26658,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1169
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26663,7 +26667,7 @@ namespace mg5amcCpu
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26672,7 +26676,7 @@ namespace mg5amcCpu
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26688,7 +26692,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1170
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[23], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26697,7 +26701,7 @@ namespace mg5amcCpu
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[98], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[98], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26706,7 +26710,7 @@ namespace mg5amcCpu
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[27], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[27], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26722,7 +26726,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1171
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26731,7 +26735,7 @@ namespace mg5amcCpu
       jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[20], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26740,7 +26744,7 @@ namespace mg5amcCpu
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[64] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26753,25 +26757,25 @@ namespace mg5amcCpu
       // *** DIAGRAM 1172 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1172
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[60] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[20] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[60] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[20] );
+      helas_FFV1_2( w_fp[3], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
+      helas_FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
 
       // Amplitude(s) for diagram number 1172
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[42] += amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
       jamp_sv[45] -= amp_sv[0];
       jamp_sv[47] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[27], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[43] -= amp_sv[0];
       jamp_sv[44] += amp_sv[0];
       jamp_sv[45] -= amp_sv[0];
       jamp_sv[46] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[42] -= amp_sv[0];
       jamp_sv[44] += amp_sv[0];
       jamp_sv[46] += amp_sv[0];
@@ -26780,12 +26784,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1173 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1173
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[60], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[68] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[20], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] );
+      helas_VVV1P0_1( w_fp[60], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[68] );
+      helas_VVV1P0_1( w_fp[20], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] );
 
       // Amplitude(s) for diagram number 1173
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26794,7 +26798,7 @@ namespace mg5amcCpu
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26803,7 +26807,7 @@ namespace mg5amcCpu
       jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26819,17 +26823,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1174
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[77], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] += amp_sv[0];
       jamp_sv[26] -= amp_sv[0];
       jamp_sv[32] -= amp_sv[0];
       jamp_sv[38] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[26] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
       jamp_sv[32] -= amp_sv[0];
       jamp_sv[36] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[77], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
       jamp_sv[36] += amp_sv[0];
@@ -26838,22 +26842,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 1175 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1175
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_FFV1_1( w_fp[2], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
+      helas_FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
+      helas_FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 1175
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[61] -= amp_sv[0];
       jamp_sv[85] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[71], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[71], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[51] += amp_sv[0];
       jamp_sv[61] -= amp_sv[0];
       jamp_sv[75] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[21], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[21], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[51] += amp_sv[0];
       jamp_sv[75] += amp_sv[0];
@@ -26865,7 +26869,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1176
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26874,7 +26878,7 @@ namespace mg5amcCpu
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26883,7 +26887,7 @@ namespace mg5amcCpu
       jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26899,17 +26903,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1177
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[99] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[47], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
       jamp_sv[115] += amp_sv[0];
@@ -26921,7 +26925,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1178
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26930,7 +26934,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[85] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[71], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[71], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26939,7 +26943,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[21], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[21], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26955,7 +26959,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1179
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26964,7 +26968,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[27], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26973,7 +26977,7 @@ namespace mg5amcCpu
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26989,7 +26993,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1180
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[60], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[9] += amp_sv[0];
       jamp_sv[14] += amp_sv[0];
@@ -27006,7 +27010,7 @@ namespace mg5amcCpu
       jamp_sv[103] += amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[24], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[14] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
@@ -27023,7 +27027,7 @@ namespace mg5amcCpu
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[20], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[8] += amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
@@ -27047,7 +27051,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1181
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[8] += amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
@@ -27064,7 +27068,7 @@ namespace mg5amcCpu
       jamp_sv[107] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[8] += amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
@@ -27081,7 +27085,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[107] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -27098,7 +27102,7 @@ namespace mg5amcCpu
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -27115,7 +27119,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[43] += amp_sv[0];
@@ -27132,7 +27136,7 @@ namespace mg5amcCpu
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[15] += amp_sv[0];
       jamp_sv[26] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -27149,7 +27153,7 @@ namespace mg5amcCpu
       jamp_sv[109] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -27166,7 +27170,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[9] += amp_sv[0];
       jamp_sv[42] += amp_sv[0];
@@ -27183,7 +27187,7 @@ namespace mg5amcCpu
       jamp_sv[104] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -27204,12 +27208,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1182 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1182
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[60], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[72] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[24], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[60] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[24] );
+      helas_VVV1P0_1( w_fp[60], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[72] );
+      helas_VVV1P0_1( w_fp[24], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[60] );
+      helas_VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[24] );
 
       // Amplitude(s) for diagram number 1182
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[72], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[72], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[8] += amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
@@ -27226,7 +27230,7 @@ namespace mg5amcCpu
       jamp_sv[107] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -27243,7 +27247,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -27267,7 +27271,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1183
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -27284,7 +27288,7 @@ namespace mg5amcCpu
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[15] += amp_sv[0];
       jamp_sv[26] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -27301,7 +27305,7 @@ namespace mg5amcCpu
       jamp_sv[109] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -27325,7 +27329,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1184
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27334,7 +27338,7 @@ namespace mg5amcCpu
       jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27343,7 +27347,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27359,17 +27363,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1185
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[102] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[107] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[27], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
@@ -27381,7 +27385,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1186
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27390,7 +27394,7 @@ namespace mg5amcCpu
       jamp_sv[38] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27399,7 +27403,7 @@ namespace mg5amcCpu
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27415,17 +27419,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1187
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[8] += amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[60] -= amp_sv[0];
       jamp_sv[84] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[71], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[71], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[50] += amp_sv[0];
       jamp_sv[60] -= amp_sv[0];
       jamp_sv[74] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[21], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[21], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[50] += amp_sv[0];
       jamp_sv[74] += amp_sv[0];
@@ -27434,25 +27438,25 @@ namespace mg5amcCpu
       // *** DIAGRAM 1188 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1188
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[71] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[59] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[71] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[59] );
+      helas_FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] );
+      helas_FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
+      helas_FFV1_2( w_fp[3], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] );
 
       // Amplitude(s) for diagram number 1188
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[24], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[36] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
       jamp_sv[39] -= amp_sv[0];
       jamp_sv[41] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[37] -= amp_sv[0];
       jamp_sv[38] += amp_sv[0];
       jamp_sv[39] -= amp_sv[0];
       jamp_sv[40] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[72], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[36] -= amp_sv[0];
       jamp_sv[38] += amp_sv[0];
       jamp_sv[40] += amp_sv[0];
@@ -27461,12 +27465,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1189 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1189
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[98] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[27] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[59], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] );
+      helas_VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[98] );
+      helas_VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[27] );
+      helas_VVV1P0_1( w_fp[59], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] );
 
       // Amplitude(s) for diagram number 1189
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27475,7 +27479,7 @@ namespace mg5amcCpu
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27484,7 +27488,7 @@ namespace mg5amcCpu
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -27500,17 +27504,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1190
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[77], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[25] += amp_sv[0];
       jamp_sv[28] -= amp_sv[0];
       jamp_sv[34] -= amp_sv[0];
       jamp_sv[44] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[77], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[28] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
       jamp_sv[34] -= amp_sv[0];
       jamp_sv[42] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[77], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[25] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
       jamp_sv[42] += amp_sv[0];
@@ -27519,22 +27523,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 1191 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1191
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] );
+      helas_FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
+      helas_FFV1_1( w_fp[2], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
 
       // Amplitude(s) for diagram number 1191
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[29], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[29], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[11] += amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[67] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[68], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[68], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[53] += amp_sv[0];
       jamp_sv[67] -= amp_sv[0];
       jamp_sv[99] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[53] += amp_sv[0];
       jamp_sv[99] += amp_sv[0];
@@ -27546,7 +27550,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1192
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27555,7 +27559,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27564,7 +27568,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -27580,17 +27584,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1193
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[75] += amp_sv[0];
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[85] += amp_sv[0];
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[39], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[75] -= amp_sv[0];
       jamp_sv[85] += amp_sv[0];
       jamp_sv[91] += amp_sv[0];
@@ -27602,7 +27606,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1194
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[29], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[29], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27611,7 +27615,7 @@ namespace mg5amcCpu
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[68], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[68], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -27620,7 +27624,7 @@ namespace mg5amcCpu
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[23], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -27636,7 +27640,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1195
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[24], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27645,7 +27649,7 @@ namespace mg5amcCpu
       jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27654,7 +27658,7 @@ namespace mg5amcCpu
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[72], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[40] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -27670,7 +27674,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1196
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[21], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
       jamp_sv[20] += amp_sv[0];
@@ -27687,7 +27691,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[71], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[20] += amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -27704,7 +27708,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[98] -= amp_sv[0];
       jamp_sv[99] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[59], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[59], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[10] += amp_sv[0];
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
@@ -27728,7 +27732,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1197
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[10] += amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[25] -= amp_sv[0];
@@ -27745,7 +27749,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[108] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[10] += amp_sv[0];
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
@@ -27762,7 +27766,7 @@ namespace mg5amcCpu
       jamp_sv[83] += amp_sv[0];
       jamp_sv[108] += amp_sv[0];
       jamp_sv[109] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
       jamp_sv[25] += amp_sv[0];
@@ -27779,7 +27783,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
       jamp_sv[109] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -27796,7 +27800,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
       jamp_sv[37] += amp_sv[0];
@@ -27813,7 +27817,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[21] += amp_sv[0];
       jamp_sv[28] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -27830,7 +27834,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -27847,7 +27851,7 @@ namespace mg5amcCpu
       jamp_sv[94] += amp_sv[0];
       jamp_sv[98] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
       jamp_sv[36] += amp_sv[0];
@@ -27864,7 +27868,7 @@ namespace mg5amcCpu
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[11] += amp_sv[0];
       jamp_sv[25] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -27885,12 +27889,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1198 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1198
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[21], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[66] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[71], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[59], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[71] );
+      helas_VVV1P0_1( w_fp[21], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[66] );
+      helas_VVV1P0_1( w_fp[71], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] );
+      helas_VVV1P0_1( w_fp[59], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[71] );
 
       // Amplitude(s) for diagram number 1198
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[66], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[66], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[10] += amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[25] -= amp_sv[0];
@@ -27907,7 +27911,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[108] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -27924,7 +27928,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -27948,7 +27952,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1199
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
       jamp_sv[25] += amp_sv[0];
@@ -27965,7 +27969,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
       jamp_sv[109] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[21] += amp_sv[0];
       jamp_sv[28] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -27982,7 +27986,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[11] += amp_sv[0];
       jamp_sv[25] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -28006,7 +28010,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1200
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28015,7 +28019,7 @@ namespace mg5amcCpu
       jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28024,7 +28028,7 @@ namespace mg5amcCpu
       jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28040,17 +28044,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1201
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[24], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[78] += amp_sv[0];
       jamp_sv[79] -= amp_sv[0];
       jamp_sv[81] -= amp_sv[0];
       jamp_sv[83] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[79] -= amp_sv[0];
       jamp_sv[80] += amp_sv[0];
       jamp_sv[81] -= amp_sv[0];
       jamp_sv[82] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[72], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[78] -= amp_sv[0];
       jamp_sv[80] += amp_sv[0];
       jamp_sv[82] += amp_sv[0];
@@ -28062,7 +28066,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1202
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28071,7 +28075,7 @@ namespace mg5amcCpu
       jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28080,7 +28084,7 @@ namespace mg5amcCpu
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28096,17 +28100,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1203
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[29], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[29], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[10] += amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[66] -= amp_sv[0];
       jamp_sv[108] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[68], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[68], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[52] += amp_sv[0];
       jamp_sv[66] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[23], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[23], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[52] += amp_sv[0];
       jamp_sv[98] += amp_sv[0];
@@ -28115,25 +28119,25 @@ namespace mg5amcCpu
       // *** DIAGRAM 1204 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1204
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[68] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[29] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[68] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[29] );
+      helas_FFV1_2( w_fp[3], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
+      helas_FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] );
 
       // Amplitude(s) for diagram number 1204
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[30] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
       jamp_sv[33] -= amp_sv[0];
       jamp_sv[35] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[31] -= amp_sv[0];
       jamp_sv[32] += amp_sv[0];
       jamp_sv[33] -= amp_sv[0];
       jamp_sv[34] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[66], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[30] -= amp_sv[0];
       jamp_sv[32] += amp_sv[0];
       jamp_sv[34] += amp_sv[0];
@@ -28142,12 +28146,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1205 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1205
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[23], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[72] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[60] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[24] );
+      helas_VVV1P0_1( w_fp[23], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[72] );
+      helas_VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[60] );
+      helas_VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[24] );
 
       // Amplitude(s) for diagram number 1205
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28156,7 +28160,7 @@ namespace mg5amcCpu
       jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28165,7 +28169,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28181,17 +28185,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1206
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[77], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[27] += amp_sv[0];
       jamp_sv[29] -= amp_sv[0];
       jamp_sv[40] -= amp_sv[0];
       jamp_sv[46] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[77], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[29] -= amp_sv[0];
       jamp_sv[37] += amp_sv[0];
       jamp_sv[40] -= amp_sv[0];
       jamp_sv[43] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[27] -= amp_sv[0];
       jamp_sv[37] += amp_sv[0];
       jamp_sv[43] += amp_sv[0];
@@ -28200,22 +28204,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 1207 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1207
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
+      helas_FFV1_1( w_fp[2], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
+      helas_FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
 
       // Amplitude(s) for diagram number 1207
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[17] += amp_sv[0];
       jamp_sv[23] -= amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[16], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[16], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[23] -= amp_sv[0];
       jamp_sv[77] += amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[27], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[27], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[77] += amp_sv[0];
       jamp_sv[101] += amp_sv[0];
@@ -28227,7 +28231,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1208
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28236,7 +28240,7 @@ namespace mg5amcCpu
       jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28245,7 +28249,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28261,17 +28265,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1209
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[51] += amp_sv[0];
       jamp_sv[53] -= amp_sv[0];
       jamp_sv[64] -= amp_sv[0];
       jamp_sv[70] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[53] -= amp_sv[0];
       jamp_sv[61] += amp_sv[0];
       jamp_sv[64] -= amp_sv[0];
       jamp_sv[67] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[51] -= amp_sv[0];
       jamp_sv[61] += amp_sv[0];
       jamp_sv[67] += amp_sv[0];
@@ -28283,7 +28287,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1210
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28292,7 +28296,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[16], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -28301,7 +28305,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[27], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[27], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -28317,7 +28321,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1211
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28326,7 +28330,7 @@ namespace mg5amcCpu
       jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28335,7 +28339,7 @@ namespace mg5amcCpu
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[66], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[34] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -28351,7 +28355,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1212
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[23], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[23], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
       jamp_sv[22] += amp_sv[0];
@@ -28368,7 +28372,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[68], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[68], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[22] += amp_sv[0];
       jamp_sv[23] -= amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -28385,7 +28389,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[100] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[29], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[29], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[16] += amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -28409,7 +28413,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1213
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[16] += amp_sv[0];
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
@@ -28426,7 +28430,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[114] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[16] += amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[22] -= amp_sv[0];
@@ -28443,7 +28447,7 @@ namespace mg5amcCpu
       jamp_sv[91] += amp_sv[0];
       jamp_sv[114] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
       jamp_sv[27] += amp_sv[0];
@@ -28460,7 +28464,7 @@ namespace mg5amcCpu
       jamp_sv[70] += amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -28477,7 +28481,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -28494,7 +28498,7 @@ namespace mg5amcCpu
       jamp_sv[91] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[23] += amp_sv[0];
       jamp_sv[29] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -28511,7 +28515,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -28528,7 +28532,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -28545,7 +28549,7 @@ namespace mg5amcCpu
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[17] += amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -28566,12 +28570,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1214 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1214
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[23], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[61] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[68], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[23] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[29], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[68] );
+      helas_VVV1P0_1( w_fp[23], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[61] );
+      helas_VVV1P0_1( w_fp[68], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_VVV1P0_1( w_fp[29], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[68] );
 
       // Amplitude(s) for diagram number 1214
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[61], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[61], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[16] += amp_sv[0];
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
@@ -28588,7 +28592,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[114] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -28605,7 +28609,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -28629,7 +28633,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1215
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[72], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[72], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
       jamp_sv[27] += amp_sv[0];
@@ -28646,7 +28650,7 @@ namespace mg5amcCpu
       jamp_sv[70] += amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[23] += amp_sv[0];
       jamp_sv[29] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -28663,7 +28667,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[17] += amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -28687,7 +28691,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1216
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28696,7 +28700,7 @@ namespace mg5amcCpu
       jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28705,7 +28709,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28721,17 +28725,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1217
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[54] += amp_sv[0];
       jamp_sv[55] -= amp_sv[0];
       jamp_sv[57] -= amp_sv[0];
       jamp_sv[59] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[55] -= amp_sv[0];
       jamp_sv[56] += amp_sv[0];
       jamp_sv[57] -= amp_sv[0];
       jamp_sv[58] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[66], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[54] -= amp_sv[0];
       jamp_sv[56] += amp_sv[0];
       jamp_sv[58] += amp_sv[0];
@@ -28743,7 +28747,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1218
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28752,7 +28756,7 @@ namespace mg5amcCpu
       jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28761,7 +28765,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28777,17 +28781,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1219
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[77], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[16] += amp_sv[0];
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[114] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[16], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[16], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[76] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[27], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[27], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[76] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
@@ -28799,7 +28803,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1220
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -28816,7 +28820,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -28833,7 +28837,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
       jamp_sv[21] += amp_sv[0];
@@ -28850,7 +28854,7 @@ namespace mg5amcCpu
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -28867,7 +28871,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -28884,7 +28888,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[19] += amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
@@ -28901,7 +28905,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[12] += amp_sv[0];
@@ -28918,7 +28922,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[117] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[12] += amp_sv[0];
@@ -28935,7 +28939,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[117] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[18] += amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[22] -= amp_sv[0];
@@ -28956,12 +28960,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1221 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1221
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[73], COUPs[0], 1.0, 0., 0., w_fp[27] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[79], COUPs[0], 1.0, 0., 0., w_fp[1] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[80], COUPs[0], 1.0, 0., 0., w_fp[16] );
+      helas_VVV1P0_1( w_fp[0], w_fp[73], COUPs[0], 1.0, 0., 0., w_fp[27] );
+      helas_VVV1P0_1( w_fp[0], w_fp[79], COUPs[0], 1.0, 0., 0., w_fp[1] );
+      helas_VVV1P0_1( w_fp[0], w_fp[80], COUPs[0], 1.0, 0., 0., w_fp[16] );
 
       // Amplitude(s) for diagram number 1221
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -28978,7 +28982,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[1], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[1], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -28995,7 +28999,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[12] += amp_sv[0];
@@ -29019,7 +29023,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1222
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[73], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[73], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -29036,7 +29040,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[79], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[79], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -29053,7 +29057,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[80], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[80], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[12] += amp_sv[0];
@@ -29077,7 +29081,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1223
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29086,7 +29090,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29095,7 +29099,7 @@ namespace mg5amcCpu
       jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -29111,17 +29115,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1224
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[113], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] += amp_sv[0];
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[113], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[113], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
@@ -29133,7 +29137,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1225
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29142,7 +29146,7 @@ namespace mg5amcCpu
       jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29151,7 +29155,7 @@ namespace mg5amcCpu
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -29167,17 +29171,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1226
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[32] += amp_sv[0];
       jamp_sv[38] -= amp_sv[0];
       jamp_sv[62] -= amp_sv[0];
       jamp_sv[86] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[38] -= amp_sv[0];
       jamp_sv[56] += amp_sv[0];
       jamp_sv[62] -= amp_sv[0];
       jamp_sv[80] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[32] -= amp_sv[0];
       jamp_sv[56] += amp_sv[0];
       jamp_sv[80] += amp_sv[0];
@@ -29189,7 +29193,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1227
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -29206,7 +29210,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -29223,7 +29227,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[15] += amp_sv[0];
@@ -29240,7 +29244,7 @@ namespace mg5amcCpu
       jamp_sv[77] += amp_sv[0];
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -29257,7 +29261,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -29274,7 +29278,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[13] += amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
@@ -29291,7 +29295,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[18] += amp_sv[0];
@@ -29308,7 +29312,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[12] += amp_sv[0];
@@ -29325,7 +29329,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[12] += amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
@@ -29346,12 +29350,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1228 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1228
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[57], COUPs[0], 1.0, 0., 0., w_fp[62] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[81], COUPs[0], 1.0, 0., 0., w_fp[80] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[82], COUPs[0], 1.0, 0., 0., w_fp[79] );
+      helas_VVV1P0_1( w_fp[0], w_fp[57], COUPs[0], 1.0, 0., 0., w_fp[62] );
+      helas_VVV1P0_1( w_fp[0], w_fp[81], COUPs[0], 1.0, 0., 0., w_fp[80] );
+      helas_VVV1P0_1( w_fp[0], w_fp[82], COUPs[0], 1.0, 0., 0., w_fp[79] );
 
       // Amplitude(s) for diagram number 1228
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -29368,7 +29372,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[80], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[80], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -29385,7 +29389,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[79], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[79], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[18] += amp_sv[0];
@@ -29409,7 +29413,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1229
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[57], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[57], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -29426,7 +29430,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[81], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[81], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -29443,7 +29447,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[82], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[82], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[12] += amp_sv[0];
@@ -29467,7 +29471,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1230
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29476,7 +29480,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29485,7 +29489,7 @@ namespace mg5amcCpu
       jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -29501,17 +29505,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1231
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] += amp_sv[0];
       jamp_sv[73] -= amp_sv[0];
       jamp_sv[75] -= amp_sv[0];
       jamp_sv[77] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[73] -= amp_sv[0];
       jamp_sv[74] += amp_sv[0];
       jamp_sv[75] -= amp_sv[0];
       jamp_sv[76] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] -= amp_sv[0];
       jamp_sv[74] += amp_sv[0];
       jamp_sv[76] += amp_sv[0];
@@ -29523,7 +29527,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1232
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29532,7 +29536,7 @@ namespace mg5amcCpu
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29541,7 +29545,7 @@ namespace mg5amcCpu
       jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -29557,17 +29561,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1233
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[34] += amp_sv[0];
       jamp_sv[44] -= amp_sv[0];
       jamp_sv[68] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[44] -= amp_sv[0];
       jamp_sv[58] += amp_sv[0];
       jamp_sv[68] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[34] -= amp_sv[0];
       jamp_sv[58] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
@@ -29579,7 +29583,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1234
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
@@ -29596,7 +29600,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
@@ -29613,7 +29617,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[9] += amp_sv[0];
@@ -29630,7 +29634,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
@@ -29647,7 +29651,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -29664,7 +29668,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[7] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[9] += amp_sv[0];
@@ -29681,7 +29685,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[19] += amp_sv[0];
@@ -29698,7 +29702,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -29715,7 +29719,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -29736,12 +29740,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1235 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1235
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[55], COUPs[0], 1.0, 0., 0., w_fp[104] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[83], COUPs[0], 1.0, 0., 0., w_fp[82] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[84], COUPs[0], 1.0, 0., 0., w_fp[81] );
+      helas_VVV1P0_1( w_fp[0], w_fp[55], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_VVV1P0_1( w_fp[0], w_fp[83], COUPs[0], 1.0, 0., 0., w_fp[82] );
+      helas_VVV1P0_1( w_fp[0], w_fp[84], COUPs[0], 1.0, 0., 0., w_fp[81] );
 
       // Amplitude(s) for diagram number 1235
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
@@ -29758,7 +29762,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[82], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[82], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
@@ -29775,7 +29779,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[81], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[81], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[19] += amp_sv[0];
@@ -29799,7 +29803,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1236
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[55], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[55], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
@@ -29816,7 +29820,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[83], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[83], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -29833,7 +29837,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[84], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[84], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -29857,7 +29861,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1237
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29866,7 +29870,7 @@ namespace mg5amcCpu
       jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29875,7 +29879,7 @@ namespace mg5amcCpu
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -29891,17 +29895,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1238
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[114], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] += amp_sv[0];
       jamp_sv[49] -= amp_sv[0];
       jamp_sv[51] -= amp_sv[0];
       jamp_sv[53] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[114], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[49] -= amp_sv[0];
       jamp_sv[50] += amp_sv[0];
       jamp_sv[51] -= amp_sv[0];
       jamp_sv[52] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[114], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] -= amp_sv[0];
       jamp_sv[50] += amp_sv[0];
       jamp_sv[52] += amp_sv[0];
@@ -29913,7 +29917,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1239
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29922,7 +29926,7 @@ namespace mg5amcCpu
       jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29931,7 +29935,7 @@ namespace mg5amcCpu
       jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -29947,17 +29951,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1240
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[40] += amp_sv[0];
       jamp_sv[46] -= amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[46] -= amp_sv[0];
       jamp_sv[82] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[40] -= amp_sv[0];
       jamp_sv[82] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/check_sa.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/check_sa.cc
+++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk
index 9cff5e1a60..0d8ea00f01 100644
--- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk
@@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
@@ -647,7 +650,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -776,6 +778,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
@@ -786,12 +796,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -962,6 +972,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
diff --git a/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h
index bcf4333c78..24e8114e3a 100644
--- a/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h
@@ -1400,8 +1400,189 @@ namespace mg5amcCpu
     return;
   }
 
+  //==========================================================================
+
+#ifndef MGONGPU_LINKER_HELAMPS
+
+#define helas_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_VVVV3_0 VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
+#define helas_VVVV4_0 VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+
+#else
+
+#define helas_VVV1_0 linker_VVV1_0
+#define helas_VVV1P0_1 linker_VVV1P0_1
+#define helas_FFV1_0 linker_FFV1_0
+#define helas_FFV1_1 linker_FFV1_1
+#define helas_FFV1_2 linker_FFV1_2
+#define helas_FFV1P0_3 linker_FFV1P0_3
+#define helas_VVVV1_0 linker_VVVV1_0
+#define helas_VVVV1P0_1 linker_VVVV1P0_1
+#define helas_VVVV3_0 linker_VVVV3_0
+#define helas_VVVV3P0_1 linker_VVVV3P0_1
+#define helas_VVVV4_0 linker_VVVV4_0
+#define helas_VVVV4P0_1 linker_VVVV4P0_1
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV1_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
   //--------------------------------------------------------------------------
 
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+#endif
+
+  //==========================================================================
+
 } // end namespace
 
 #endif // HelAmps_sm_H
diff --git a/epochX/cudacpp/gg_ttggg.mad/src/cudacpp_config.mk b/epochX/cudacpp/gg_ttggg.mad/src/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/gg_ttggg.mad/src/cudacpp_config.mk
+++ b/epochX/cudacpp/gg_ttggg.mad/src/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuConfig.h
index 858546db00..cf6a228859 100644
--- a/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuConfig.h
+++ b/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -78,9 +78,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -147,6 +154,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {
diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt
index 70ece972f5..404d5a1549 100644
--- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt
+++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~ g g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005778312683105469 [0m
+[1;32mDEBUG: model prefixing  takes 0.0058324337005615234 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1  
 INFO: Process has 1240 diagrams 
-1 processes with 1240 diagrams generated in 1.872 s
+1 processes with 1240 diagrams generated in 1.936 s
 Total: 1 processes with 1240 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg
 Load PLUGIN.CUDACPP_OUTPUT
@@ -178,14 +178,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. 
-Generated helas calls for 1 subprocesses (1240 diagrams) in 6.585 s
+Generated helas calls for 1 subprocesses (1240 diagrams) in 6.790 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.348 s
+ALOHA: aloha creates 5 routines in  0.365 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -200,6 +200,8 @@ ALOHA: aloha creates 5 routines in  0.348 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.h
@@ -208,7 +210,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. 
 quit
 
-real	0m13.103s
-user	0m12.928s
-sys	0m0.109s
-Code generation completed in 14 seconds
+real	0m13.502s
+user	0m13.358s
+sys	0m0.089s
+Code generation completed in 13 seconds
diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MemoryAccessGs.h
+++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 
diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc
index 85f808d59d..4879802d7f 100644
--- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc
+++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc
@@ -204,7 +204,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +218,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -334,13 +338,13 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][6], +1, w_fp[6], 6 );
 
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[9] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_VVV1P0_1( w_fp[7], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[9] );
+      helas_VVV1P0_1( w_fp[8], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 1
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[9], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -364,10 +368,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 1240 ***
 
       // Wavefunction(s) for diagram number 2
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[11] );
+      helas_VVV1P0_1( w_fp[8], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 2
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[9], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -394,7 +398,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 3
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -414,7 +418,7 @@ namespace mg5amcCpu
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -434,7 +438,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[118] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -458,11 +462,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 1240 ***
 
       // Wavefunction(s) for diagram number 4
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[12] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[13] );
+      helas_VVV1P0_1( w_fp[7], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[12] );
+      helas_VVV1P0_1( w_fp[8], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[13] );
 
       // Amplitude(s) for diagram number 4
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[12], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -489,7 +493,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 5
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[12], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -516,7 +520,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -536,7 +540,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -556,7 +560,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -580,10 +584,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 1240 ***
 
       // Wavefunction(s) for diagram number 7
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[14] );
+      helas_VVV1P0_1( w_fp[7], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 7
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[14], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -610,7 +614,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 8
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[14], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -637,7 +641,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 9
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -657,7 +661,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[97] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -677,7 +681,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -701,12 +705,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 1240 ***
 
       // Wavefunction(s) for diagram number 10
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[16] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] );
+      helas_VVVV1P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] );
+      helas_VVVV3P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[16] );
+      helas_VVVV4P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] );
 
       // Amplitude(s) for diagram number 10
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[15], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[15], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -726,7 +730,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -746,7 +750,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -770,12 +774,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 1240 ***
 
       // Wavefunction(s) for diagram number 11
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[18] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[19] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[20] );
+      helas_VVVV1P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[18] );
+      helas_VVVV3P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[19] );
+      helas_VVVV4P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[20] );
 
       // Amplitude(s) for diagram number 11
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[18], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[18], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -795,7 +799,7 @@ namespace mg5amcCpu
       jamp_sv[95] += amp_sv[0];
       jamp_sv[108] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -815,7 +819,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -839,12 +843,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 1240 ***
 
       // Wavefunction(s) for diagram number 12
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] );
+      helas_VVVV1P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_VVVV3P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] );
+      helas_VVVV4P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] );
 
       // Amplitude(s) for diagram number 12
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -864,7 +868,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[114] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -884,7 +888,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[97] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -908,10 +912,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 13 OF 1240 ***
 
       // Wavefunction(s) for diagram number 13
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[24] );
+      helas_VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[24] );
 
       // Amplitude(s) for diagram number 13
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -931,7 +935,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -951,7 +955,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -975,10 +979,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 14 OF 1240 ***
 
       // Wavefunction(s) for diagram number 14
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[25] );
+      helas_VVV1P0_1( w_fp[7], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[25] );
 
       // Amplitude(s) for diagram number 14
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[6], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1002,10 +1006,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 15 OF 1240 ***
 
       // Wavefunction(s) for diagram number 15
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[26] );
+      helas_VVV1P0_1( w_fp[7], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[26] );
 
       // Amplitude(s) for diagram number 15
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[26], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[26], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1032,7 +1036,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 16
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[24], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[24], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1056,10 +1060,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 17 OF 1240 ***
 
       // Wavefunction(s) for diagram number 17
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[27] );
+      helas_VVV1P0_1( w_fp[4], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[27] );
 
       // Amplitude(s) for diagram number 17
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1079,7 +1083,7 @@ namespace mg5amcCpu
       jamp_sv[95] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1099,7 +1103,7 @@ namespace mg5amcCpu
       jamp_sv[95] += amp_sv[0];
       jamp_sv[108] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1126,7 +1130,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 18
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[5], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1150,10 +1154,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 19 OF 1240 ***
 
       // Wavefunction(s) for diagram number 19
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[28] );
+      helas_VVV1P0_1( w_fp[7], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[28] );
 
       // Amplitude(s) for diagram number 19
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[28], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[28], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1180,7 +1184,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 20
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[27], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[27], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1204,10 +1208,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 21 OF 1240 ***
 
       // Wavefunction(s) for diagram number 21
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] );
+      helas_VVV1P0_1( w_fp[5], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] );
 
       // Amplitude(s) for diagram number 21
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1227,7 +1231,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1247,7 +1251,7 @@ namespace mg5amcCpu
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1274,7 +1278,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 22
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[29], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1301,7 +1305,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 23
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[29], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[29], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1325,10 +1329,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 24 OF 1240 ***
 
       // Wavefunction(s) for diagram number 24
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[7], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[25] );
+      helas_VVV1P0_1( w_fp[7], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[25] );
 
       // Amplitude(s) for diagram number 24
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1352,12 +1356,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 25 OF 1240 ***
 
       // Wavefunction(s) for diagram number 25
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[30] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[31] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[32] );
+      helas_VVVV1P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[30] );
+      helas_VVVV3P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[31] );
+      helas_VVVV4P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[32] );
 
       // Amplitude(s) for diagram number 25
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[30], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[8], w_fp[30], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1377,7 +1381,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[31], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[8], w_fp[31], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1397,7 +1401,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[8], w_fp[32], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[8], w_fp[32], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1421,12 +1425,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 26 OF 1240 ***
 
       // Wavefunction(s) for diagram number 26
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[33] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[33], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[35] );
+      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[33] );
+      helas_FFV1_2( w_fp[3], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
+      helas_FFV1_1( w_fp[33], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[35] );
 
       // Amplitude(s) for diagram number 26
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1436,10 +1440,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 27 OF 1240 ***
 
       // Wavefunction(s) for diagram number 27
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[33], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[36] );
+      helas_FFV1_1( w_fp[33], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[36] );
 
       // Amplitude(s) for diagram number 27
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1449,10 +1453,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 28 OF 1240 ***
 
       // Wavefunction(s) for diagram number 28
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[37] );
+      helas_FFV1P0_3( w_fp[3], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[37] );
 
       // Amplitude(s) for diagram number 28
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[12], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1471,7 +1475,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 29
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[36], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[36], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1486,7 +1490,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 30
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[14], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1505,7 +1509,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 31
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[35], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[35], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1520,7 +1524,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 32
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1532,7 +1536,7 @@ namespace mg5amcCpu
       jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1544,7 +1548,7 @@ namespace mg5amcCpu
       jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1560,11 +1564,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 33 OF 1240 ***
 
       // Wavefunction(s) for diagram number 33
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[38] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[33], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] );
+      helas_FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[38] );
+      helas_FFV1_1( w_fp[33], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] );
 
       // Amplitude(s) for diagram number 33
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1574,10 +1578,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 34 OF 1240 ***
 
       // Wavefunction(s) for diagram number 34
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[38], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] );
+      helas_FFV1_2( w_fp[38], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] );
 
       // Amplitude(s) for diagram number 34
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1590,7 +1594,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 35
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[33], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1602,10 +1606,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 36 OF 1240 ***
 
       // Wavefunction(s) for diagram number 36
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[41] );
+      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[41] );
 
       // Amplitude(s) for diagram number 36
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[39], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1615,10 +1619,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 37 OF 1240 ***
 
       // Wavefunction(s) for diagram number 37
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[41], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[42] );
+      helas_FFV1_2( w_fp[41], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[42] );
 
       // Amplitude(s) for diagram number 37
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[42], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[42], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1631,7 +1635,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 38
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[33], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1646,7 +1650,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 39
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1661,7 +1665,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 40
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1676,7 +1680,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 41
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[25], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[25], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1692,11 +1696,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 42 OF 1240 ***
 
       // Wavefunction(s) for diagram number 42
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[39], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[43] );
+      helas_FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] );
+      helas_FFV1_1( w_fp[39], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[43] );
 
       // Amplitude(s) for diagram number 42
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1706,10 +1710,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 43 OF 1240 ***
 
       // Wavefunction(s) for diagram number 43
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[39], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[44] );
+      helas_FFV1_1( w_fp[39], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[44] );
 
       // Amplitude(s) for diagram number 43
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1719,10 +1723,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 44 OF 1240 ***
 
       // Wavefunction(s) for diagram number 44
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[45] );
+      helas_FFV1P0_3( w_fp[3], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[45] );
 
       // Amplitude(s) for diagram number 44
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[9], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1741,7 +1745,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 45
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[44], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[44], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1756,7 +1760,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 46
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[14], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1775,7 +1779,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 47
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[43], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[43], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1790,7 +1794,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 48
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1802,7 +1806,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1814,7 +1818,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1830,11 +1834,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 49 OF 1240 ***
 
       // Wavefunction(s) for diagram number 49
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[46] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[39], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] );
+      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[46] );
+      helas_FFV1_1( w_fp[39], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] );
 
       // Amplitude(s) for diagram number 49
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[47], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1844,10 +1848,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 50 OF 1240 ***
 
       // Wavefunction(s) for diagram number 50
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[46], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] );
+      helas_FFV1_2( w_fp[46], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] );
 
       // Amplitude(s) for diagram number 50
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1860,7 +1864,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 51
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[39], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1875,7 +1879,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 52
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1888,7 +1892,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 53
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[42], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[42], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1901,7 +1905,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 54
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[39], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1916,7 +1920,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 55
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1931,7 +1935,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 56
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1946,7 +1950,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 57
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[28], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[28], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1962,11 +1966,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 58 OF 1240 ***
 
       // Wavefunction(s) for diagram number 58
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[47], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[49] );
+      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] );
+      helas_FFV1_1( w_fp[47], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[49] );
 
       // Amplitude(s) for diagram number 58
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1976,10 +1980,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 59 OF 1240 ***
 
       // Wavefunction(s) for diagram number 59
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[47], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[50] );
+      helas_FFV1_1( w_fp[47], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[50] );
 
       // Amplitude(s) for diagram number 59
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1989,10 +1993,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 60 OF 1240 ***
 
       // Wavefunction(s) for diagram number 60
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[51] );
+      helas_FFV1P0_3( w_fp[3], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[51] );
 
       // Amplitude(s) for diagram number 60
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[9], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2011,7 +2015,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 61
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[50], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[50], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2026,7 +2030,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 62
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[12], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2045,7 +2049,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 63
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[49], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[49], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2060,7 +2064,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 64
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2072,7 +2076,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2084,7 +2088,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2100,10 +2104,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 65 OF 1240 ***
 
       // Wavefunction(s) for diagram number 65
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[47], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
+      helas_FFV1_1( w_fp[47], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
 
       // Amplitude(s) for diagram number 65
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2116,7 +2120,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 66
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2129,7 +2133,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 67
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[47], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2144,7 +2148,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 68
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2157,7 +2161,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 69
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2170,7 +2174,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 70
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[47], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2185,7 +2189,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 71
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[52], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2200,7 +2204,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 72
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2215,7 +2219,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 73
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[26], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[26], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2231,11 +2235,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 74 OF 1240 ***
 
       // Wavefunction(s) for diagram number 74
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[46], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
+      helas_FFV1_1( w_fp[2], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
+      helas_FFV1_2( w_fp[46], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
 
       // Amplitude(s) for diagram number 74
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2245,10 +2249,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 75 OF 1240 ***
 
       // Wavefunction(s) for diagram number 75
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[46], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[53] );
+      helas_FFV1_2( w_fp[46], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[53] );
 
       // Amplitude(s) for diagram number 75
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2258,10 +2262,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 76 OF 1240 ***
 
       // Wavefunction(s) for diagram number 76
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[54] );
+      helas_FFV1P0_3( w_fp[46], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[54] );
 
       // Amplitude(s) for diagram number 76
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[12], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2280,7 +2284,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 77
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2295,7 +2299,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 78
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[14], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2314,7 +2318,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 79
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2329,7 +2333,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 80
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2341,7 +2345,7 @@ namespace mg5amcCpu
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2353,7 +2357,7 @@ namespace mg5amcCpu
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2372,7 +2376,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 81
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[52], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[52], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2387,7 +2391,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 82
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2402,7 +2406,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 83
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[25], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[25], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2418,10 +2422,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 84 OF 1240 ***
 
       // Wavefunction(s) for diagram number 84
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[38], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[25] );
+      helas_FFV1_2( w_fp[38], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[25] );
 
       // Amplitude(s) for diagram number 84
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2431,10 +2435,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 85 OF 1240 ***
 
       // Wavefunction(s) for diagram number 85
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[38], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] );
+      helas_FFV1_2( w_fp[38], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] );
 
       // Amplitude(s) for diagram number 85
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2444,10 +2448,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 86 OF 1240 ***
 
       // Wavefunction(s) for diagram number 86
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[23] );
+      helas_FFV1P0_3( w_fp[38], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[23] );
 
       // Amplitude(s) for diagram number 86
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[9], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2466,7 +2470,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 87
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2481,7 +2485,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 88
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[14], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2500,7 +2504,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 89
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2515,7 +2519,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 90
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2527,7 +2531,7 @@ namespace mg5amcCpu
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2539,7 +2543,7 @@ namespace mg5amcCpu
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2558,7 +2562,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 91
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[52], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[52], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2573,7 +2577,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 92
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2588,7 +2592,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 93
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[28], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[28], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2604,10 +2608,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 94 OF 1240 ***
 
       // Wavefunction(s) for diagram number 94
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[41], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[28] );
+      helas_FFV1_2( w_fp[41], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[28] );
 
       // Amplitude(s) for diagram number 94
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2617,10 +2621,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 95 OF 1240 ***
 
       // Wavefunction(s) for diagram number 95
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[41], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] );
+      helas_FFV1_2( w_fp[41], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] );
 
       // Amplitude(s) for diagram number 95
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2630,10 +2634,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 96 OF 1240 ***
 
       // Wavefunction(s) for diagram number 96
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[20] );
+      helas_FFV1P0_3( w_fp[41], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[20] );
 
       // Amplitude(s) for diagram number 96
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[9], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2652,7 +2656,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 97
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2667,7 +2671,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 98
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[12], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2686,7 +2690,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 99
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2701,7 +2705,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 100
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2713,7 +2717,7 @@ namespace mg5amcCpu
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2725,7 +2729,7 @@ namespace mg5amcCpu
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2744,7 +2748,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 101
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[52], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[52], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2759,7 +2763,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 102
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[42], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[42], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2774,7 +2778,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 103
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[26], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[26], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2790,10 +2794,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 104 OF 1240 ***
 
       // Wavefunction(s) for diagram number 104
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[26] );
+      helas_FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[26] );
 
       // Amplitude(s) for diagram number 104
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2805,10 +2809,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 105 OF 1240 ***
 
       // Wavefunction(s) for diagram number 105
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[42] );
+      helas_VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[42] );
 
       // Amplitude(s) for diagram number 105
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[52], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2824,10 +2828,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 106 OF 1240 ***
 
       // Wavefunction(s) for diagram number 106
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
+      helas_FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
 
       // Amplitude(s) for diagram number 106
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2842,7 +2846,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 107
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2861,7 +2865,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 108
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[17], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[17], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2880,7 +2884,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 109
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2896,10 +2900,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 110 OF 1240 ***
 
       // Wavefunction(s) for diagram number 110
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
 
       // Amplitude(s) for diagram number 110
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2911,10 +2915,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 111 OF 1240 ***
 
       // Wavefunction(s) for diagram number 111
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] );
+      helas_VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] );
 
       // Amplitude(s) for diagram number 111
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[52], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2930,10 +2934,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 112 OF 1240 ***
 
       // Wavefunction(s) for diagram number 112
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
+      helas_FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
 
       // Amplitude(s) for diagram number 112
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2948,7 +2952,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 113
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2967,7 +2971,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 114
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[15], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[15], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2986,7 +2990,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 115
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3002,10 +3006,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 116 OF 1240 ***
 
       // Wavefunction(s) for diagram number 116
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 116
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3017,10 +3021,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 117 OF 1240 ***
 
       // Wavefunction(s) for diagram number 117
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[19] );
+      helas_VVV1P0_1( w_fp[4], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[19] );
 
       // Amplitude(s) for diagram number 117
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[52], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3036,10 +3040,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 118 OF 1240 ***
 
       // Wavefunction(s) for diagram number 118
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[18] );
+      helas_FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[18] );
 
       // Amplitude(s) for diagram number 118
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3054,7 +3058,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 119
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3073,7 +3077,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 120
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[18], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[18], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3092,7 +3096,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 121
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3111,7 +3115,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 122
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[52], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3123,7 +3127,7 @@ namespace mg5amcCpu
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[52], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3135,7 +3139,7 @@ namespace mg5amcCpu
       jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[52], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3154,7 +3158,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 123
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3166,7 +3170,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3178,7 +3182,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3194,13 +3198,13 @@ namespace mg5amcCpu
       // *** DIAGRAM 124 OF 1240 ***
 
       // Wavefunction(s) for diagram number 124
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[34], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[52], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
+      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
+      helas_FFV1_1( w_fp[34], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_2( w_fp[52], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
 
       // Amplitude(s) for diagram number 124
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3209,10 +3213,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 125 OF 1240 ***
 
       // Wavefunction(s) for diagram number 125
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[52], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_FFV1_2( w_fp[52], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 125
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3221,11 +3225,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 126 OF 1240 ***
 
       // Wavefunction(s) for diagram number 126
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[34], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[55] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[52], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[56] );
+      helas_FFV1_1( w_fp[34], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[55] );
+      helas_FFV1_2( w_fp[52], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[56] );
 
       // Amplitude(s) for diagram number 126
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3237,7 +3241,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 127
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3246,10 +3250,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 128 OF 1240 ***
 
       // Wavefunction(s) for diagram number 128
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[34], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[57] );
+      helas_FFV1_1( w_fp[34], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[57] );
 
       // Amplitude(s) for diagram number 128
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3261,7 +3265,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 129
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3270,10 +3274,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 130 OF 1240 ***
 
       // Wavefunction(s) for diagram number 130
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[52], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[58] );
+      helas_FFV1P0_3( w_fp[52], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[58] );
 
       // Amplitude(s) for diagram number 130
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[6], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3285,10 +3289,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 131 OF 1240 ***
 
       // Wavefunction(s) for diagram number 131
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[34], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
+      helas_FFV1_1( w_fp[34], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
 
       // Amplitude(s) for diagram number 131
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3301,7 +3305,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 132
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[57], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[57], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3314,7 +3318,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 133
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[5], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3326,10 +3330,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 134 OF 1240 ***
 
       // Wavefunction(s) for diagram number 134
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[34], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
+      helas_FFV1_1( w_fp[34], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
 
       // Amplitude(s) for diagram number 134
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[60], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[60], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3342,7 +3346,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 135
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[55], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[55], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3355,7 +3359,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 136
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[29], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3370,7 +3374,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 137
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[9], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[9], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3380,10 +3384,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 138 OF 1240 ***
 
       // Wavefunction(s) for diagram number 138
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[34], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] );
+      helas_FFV1_1( w_fp[34], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] );
 
       // Amplitude(s) for diagram number 138
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[58], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[58], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3396,7 +3400,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 139
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[34], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[34], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3404,7 +3408,7 @@ namespace mg5amcCpu
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[34], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[34], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3412,7 +3416,7 @@ namespace mg5amcCpu
       jamp_sv[15] += amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[34], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[34], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3424,12 +3428,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 140 OF 1240 ***
 
       // Wavefunction(s) for diagram number 140
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[61] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[62] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[63] );
+      helas_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[61] );
+      helas_FFV1P0_3( w_fp[3], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[62] );
+      helas_VVV1P0_1( w_fp[61], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[63] );
 
       // Amplitude(s) for diagram number 140
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3445,10 +3449,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 141 OF 1240 ***
 
       // Wavefunction(s) for diagram number 141
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[61], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[64] );
+      helas_VVV1P0_1( w_fp[61], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[64] );
 
       // Amplitude(s) for diagram number 141
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3467,7 +3471,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 142
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3479,7 +3483,7 @@ namespace mg5amcCpu
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3491,7 +3495,7 @@ namespace mg5amcCpu
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3507,10 +3511,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 143 OF 1240 ***
 
       // Wavefunction(s) for diagram number 143
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[65] );
+      helas_FFV1_2( w_fp[3], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[65] );
 
       // Amplitude(s) for diagram number 143
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3523,7 +3527,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 144
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[55], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3538,7 +3542,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 145
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3551,7 +3555,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 146
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[57], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3563,10 +3567,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 147 OF 1240 ***
 
       // Wavefunction(s) for diagram number 147
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[34], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] );
+      helas_FFV1_1( w_fp[34], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] );
 
       // Amplitude(s) for diagram number 147
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[66], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[66], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3576,10 +3580,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 148 OF 1240 ***
 
       // Wavefunction(s) for diagram number 148
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[38], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[67] );
+      helas_FFV1P0_3( w_fp[38], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[67] );
 
       // Amplitude(s) for diagram number 148
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[6], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3594,7 +3598,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 149
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[57], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[57], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3607,7 +3611,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 150
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[66], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[66], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3617,10 +3621,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 151 OF 1240 ***
 
       // Wavefunction(s) for diagram number 151
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[41], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[68] );
+      helas_FFV1P0_3( w_fp[41], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[68] );
 
       // Amplitude(s) for diagram number 151
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3635,7 +3639,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 152
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[55], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[55], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3648,7 +3652,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 153
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[66], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[66], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3663,7 +3667,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 154
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[29], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[29], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3682,7 +3686,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 155
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[58], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[58], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3694,11 +3698,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 156 OF 1240 ***
 
       // Wavefunction(s) for diagram number 156
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[66] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[69] );
+      helas_VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[66] );
+      helas_VVV1P0_1( w_fp[66], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[69] );
 
       // Amplitude(s) for diagram number 156
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3714,10 +3718,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 157 OF 1240 ***
 
       // Wavefunction(s) for diagram number 157
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[66], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[70] );
+      helas_VVV1P0_1( w_fp[66], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[70] );
 
       // Amplitude(s) for diagram number 157
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3736,7 +3740,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 158
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3748,7 +3752,7 @@ namespace mg5amcCpu
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3760,7 +3764,7 @@ namespace mg5amcCpu
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3776,10 +3780,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 159 OF 1240 ***
 
       // Wavefunction(s) for diagram number 159
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
+      helas_FFV1_2( w_fp[3], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
 
       // Amplitude(s) for diagram number 159
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3792,7 +3796,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 160
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3807,7 +3811,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 161
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3820,7 +3824,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 162
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[57], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3832,10 +3836,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 163 OF 1240 ***
 
       // Wavefunction(s) for diagram number 163
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[34], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] );
+      helas_FFV1_1( w_fp[34], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] );
 
       // Amplitude(s) for diagram number 163
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[72], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[72], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3845,10 +3849,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 164 OF 1240 ***
 
       // Wavefunction(s) for diagram number 164
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[46], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[73] );
+      helas_FFV1P0_3( w_fp[46], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[73] );
 
       // Amplitude(s) for diagram number 164
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[6], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3863,7 +3867,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 165
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[57], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[57], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3876,7 +3880,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 166
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[72], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[72], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3889,7 +3893,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 167
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3904,7 +3908,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 168
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[9], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[9], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3917,7 +3921,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 169
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[72], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[72], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3932,7 +3936,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 170
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[27], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[27], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3951,7 +3955,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 171
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[60], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[60], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3963,11 +3967,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 172 OF 1240 ***
 
       // Wavefunction(s) for diagram number 172
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[72] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[74] );
+      helas_VVV1P0_1( w_fp[1], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[72] );
+      helas_VVV1P0_1( w_fp[72], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[74] );
 
       // Amplitude(s) for diagram number 172
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3983,10 +3987,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 173 OF 1240 ***
 
       // Wavefunction(s) for diagram number 173
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[72], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[75] );
+      helas_VVV1P0_1( w_fp[72], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[75] );
 
       // Amplitude(s) for diagram number 173
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4005,7 +4009,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 174
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4017,7 +4021,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4029,7 +4033,7 @@ namespace mg5amcCpu
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4045,10 +4049,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 175 OF 1240 ***
 
       // Wavefunction(s) for diagram number 175
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[76] );
+      helas_FFV1_2( w_fp[3], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[76] );
 
       // Amplitude(s) for diagram number 175
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4061,7 +4065,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 176
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4076,7 +4080,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 177
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4089,7 +4093,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 178
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[55], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4101,10 +4105,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 179 OF 1240 ***
 
       // Wavefunction(s) for diagram number 179
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[34], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
+      helas_FFV1_1( w_fp[34], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
 
       // Amplitude(s) for diagram number 179
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4117,7 +4121,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 180
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[5], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4132,7 +4136,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 181
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[55], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[55], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4145,7 +4149,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 182
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4158,7 +4162,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 183
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4173,7 +4177,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 184
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[9], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[9], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4186,7 +4190,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 185
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4201,7 +4205,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 186
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[24], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[24], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4220,7 +4224,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 187
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4232,10 +4236,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 188 OF 1240 ***
 
       // Wavefunction(s) for diagram number 188
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[34], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
+      helas_FFV1_1( w_fp[34], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
 
       // Amplitude(s) for diagram number 188
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4247,7 +4251,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 189
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4256,10 +4260,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 190 OF 1240 ***
 
       // Wavefunction(s) for diagram number 190
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[46], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[78] );
+      helas_FFV1_2( w_fp[46], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[78] );
 
       // Amplitude(s) for diagram number 190
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4271,7 +4275,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 191
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4283,7 +4287,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 192
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4295,7 +4299,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 193
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4307,7 +4311,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 194
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4320,7 +4324,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 195
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[29], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4335,7 +4339,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 196
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[58], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[58], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4348,7 +4352,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 197
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4360,7 +4364,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 198
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4369,10 +4373,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 199 OF 1240 ***
 
       // Wavefunction(s) for diagram number 199
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[38], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] );
+      helas_FFV1_2( w_fp[38], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] );
 
       // Amplitude(s) for diagram number 199
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4384,7 +4388,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 200
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4396,7 +4400,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 201
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4408,7 +4412,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 202
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4420,7 +4424,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 203
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4433,7 +4437,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 204
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4448,7 +4452,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 205
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[60], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[60], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4461,7 +4465,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 206
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4473,7 +4477,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 207
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4482,10 +4486,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 208 OF 1240 ***
 
       // Wavefunction(s) for diagram number 208
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[41], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
+      helas_FFV1_2( w_fp[41], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
 
       // Amplitude(s) for diagram number 208
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4497,7 +4501,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 209
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4509,7 +4513,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 210
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4521,7 +4525,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 211
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4533,7 +4537,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 212
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4546,7 +4550,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 213
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4561,7 +4565,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 214
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4574,7 +4578,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 215
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4587,7 +4591,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 216
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4599,10 +4603,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 217 OF 1240 ***
 
       // Wavefunction(s) for diagram number 217
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[59] );
+      helas_VVV1P0_1( w_fp[1], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[59] );
 
       // Amplitude(s) for diagram number 217
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4621,7 +4625,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 218
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4640,7 +4644,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 219
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4652,7 +4656,7 @@ namespace mg5amcCpu
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4664,7 +4668,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4683,7 +4687,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 220
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[57], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4698,7 +4702,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 221
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4711,7 +4715,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 222
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4724,7 +4728,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 223
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4736,10 +4740,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 224 OF 1240 ***
 
       // Wavefunction(s) for diagram number 224
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[68] );
+      helas_VVV1P0_1( w_fp[1], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[68] );
 
       // Amplitude(s) for diagram number 224
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4758,7 +4762,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 225
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4777,7 +4781,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 226
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4789,7 +4793,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4801,7 +4805,7 @@ namespace mg5amcCpu
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4820,7 +4824,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 227
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[55], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4835,7 +4839,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 228
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4848,7 +4852,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 229
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4861,7 +4865,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 230
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4873,10 +4877,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 231 OF 1240 ***
 
       // Wavefunction(s) for diagram number 231
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[67] );
+      helas_VVV1P0_1( w_fp[1], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[67] );
 
       // Amplitude(s) for diagram number 231
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4895,7 +4899,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 232
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4914,7 +4918,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 233
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4926,7 +4930,7 @@ namespace mg5amcCpu
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4938,7 +4942,7 @@ namespace mg5amcCpu
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4957,7 +4961,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 234
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4972,7 +4976,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 235
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4982,12 +4986,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 236 OF 1240 ***
 
       // Wavefunction(s) for diagram number 236
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[73] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[79] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[80] );
+      helas_VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[73] );
+      helas_VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[79] );
+      helas_VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[80] );
 
       // Amplitude(s) for diagram number 236
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[73], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[73], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4999,7 +5003,7 @@ namespace mg5amcCpu
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[79], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[79], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5011,7 +5015,7 @@ namespace mg5amcCpu
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[80], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[80], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5030,7 +5034,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 237
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[57], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5038,7 +5042,7 @@ namespace mg5amcCpu
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[57], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5046,7 +5050,7 @@ namespace mg5amcCpu
       jamp_sv[20] += amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[57], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5061,7 +5065,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 238
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[34], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[34], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5069,7 +5073,7 @@ namespace mg5amcCpu
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[34], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[34], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5077,7 +5081,7 @@ namespace mg5amcCpu
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[12] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[34], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[34], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5089,12 +5093,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 239 OF 1240 ***
 
       // Wavefunction(s) for diagram number 239
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[57] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[81] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[82] );
+      helas_VVVV1P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[57] );
+      helas_VVVV3P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[81] );
+      helas_VVVV4P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[82] );
 
       // Amplitude(s) for diagram number 239
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[57], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[57], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5106,7 +5110,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[81], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[81], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5118,7 +5122,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[82], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[82], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5137,7 +5141,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 240
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[55], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5145,7 +5149,7 @@ namespace mg5amcCpu
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[55], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5153,7 +5157,7 @@ namespace mg5amcCpu
       jamp_sv[14] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[16] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[55], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5168,7 +5172,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 241
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[34], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[34], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5176,7 +5180,7 @@ namespace mg5amcCpu
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[34], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[34], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5184,7 +5188,7 @@ namespace mg5amcCpu
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[18] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[34], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[34], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5196,12 +5200,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 242 OF 1240 ***
 
       // Wavefunction(s) for diagram number 242
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[55] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[83] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[84] );
+      helas_VVVV1P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[55] );
+      helas_VVVV3P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[83] );
+      helas_VVVV4P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[84] );
 
       // Amplitude(s) for diagram number 242
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[55], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[55], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5213,7 +5217,7 @@ namespace mg5amcCpu
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[83], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[83], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5225,7 +5229,7 @@ namespace mg5amcCpu
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[84], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[84], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5244,7 +5248,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 243
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5252,7 +5256,7 @@ namespace mg5amcCpu
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5260,7 +5264,7 @@ namespace mg5amcCpu
       jamp_sv[8] += amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[10] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5275,7 +5279,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 244
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[34], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[34], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5283,7 +5287,7 @@ namespace mg5amcCpu
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[34], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[34], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5291,7 +5295,7 @@ namespace mg5amcCpu
       jamp_sv[13] += amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[34], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[34], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5306,7 +5310,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 245
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5314,7 +5318,7 @@ namespace mg5amcCpu
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5322,7 +5326,7 @@ namespace mg5amcCpu
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[4] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5337,7 +5341,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 246
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[30], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[30], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5349,7 +5353,7 @@ namespace mg5amcCpu
       jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[31], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[31], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5361,7 +5365,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[32], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[32], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5377,13 +5381,13 @@ namespace mg5amcCpu
       // *** DIAGRAM 247 OF 1240 ***
 
       // Wavefunction(s) for diagram number 247
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[77], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
+      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
+      helas_FFV1_2( w_fp[62], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
+      helas_FFV1_1( w_fp[77], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 247
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5392,10 +5396,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 248 OF 1240 ***
 
       // Wavefunction(s) for diagram number 248
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[77], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[85] );
+      helas_FFV1_1( w_fp[77], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[85] );
 
       // Amplitude(s) for diagram number 248
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5404,11 +5408,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 249 OF 1240 ***
 
       // Wavefunction(s) for diagram number 249
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[77], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[87] );
+      helas_FFV1_2( w_fp[62], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] );
+      helas_FFV1_1( w_fp[77], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[87] );
 
       // Amplitude(s) for diagram number 249
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5420,7 +5424,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 250
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5429,10 +5433,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 251 OF 1240 ***
 
       // Wavefunction(s) for diagram number 251
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] );
+      helas_FFV1_2( w_fp[62], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] );
 
       // Amplitude(s) for diagram number 251
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5444,7 +5448,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 252
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5453,10 +5457,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 253 OF 1240 ***
 
       // Wavefunction(s) for diagram number 253
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[89] );
+      helas_FFV1P0_3( w_fp[62], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[89] );
 
       // Amplitude(s) for diagram number 253
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[6], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5468,10 +5472,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 254 OF 1240 ***
 
       // Wavefunction(s) for diagram number 254
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
+      helas_FFV1_2( w_fp[62], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
 
       // Amplitude(s) for diagram number 254
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5484,7 +5488,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 255
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5497,7 +5501,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 256
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[5], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5509,10 +5513,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 257 OF 1240 ***
 
       // Wavefunction(s) for diagram number 257
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] );
+      helas_FFV1_2( w_fp[62], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] );
 
       // Amplitude(s) for diagram number 257
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[91], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[91], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5525,7 +5529,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 258
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5538,7 +5542,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 259
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[29], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5553,7 +5557,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 260
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5563,10 +5567,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 261 OF 1240 ***
 
       // Wavefunction(s) for diagram number 261
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] );
+      helas_FFV1_2( w_fp[62], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] );
 
       // Amplitude(s) for diagram number 261
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[89], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[89], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5579,7 +5583,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 262
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[77], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[77], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5587,7 +5591,7 @@ namespace mg5amcCpu
       jamp_sv[35] -= amp_sv[0];
       jamp_sv[41] -= amp_sv[0];
       jamp_sv[47] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[77], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[77], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5595,7 +5599,7 @@ namespace mg5amcCpu
       jamp_sv[39] += amp_sv[0];
       jamp_sv[41] -= amp_sv[0];
       jamp_sv[45] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[77], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[77], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5607,10 +5611,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 263 OF 1240 ***
 
       // Wavefunction(s) for diagram number 263
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[92] );
+      helas_FFV1P0_3( w_fp[62], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[92] );
 
       // Amplitude(s) for diagram number 263
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5629,7 +5633,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 264
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5648,7 +5652,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 265
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5660,7 +5664,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5672,7 +5676,7 @@ namespace mg5amcCpu
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5688,10 +5692,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 266 OF 1240 ***
 
       // Wavefunction(s) for diagram number 266
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[93] );
+      helas_FFV1_1( w_fp[2], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[93] );
 
       // Amplitude(s) for diagram number 266
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5704,7 +5708,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 267
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5719,7 +5723,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 268
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5732,7 +5736,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 269
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5744,10 +5748,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 270 OF 1240 ***
 
       // Wavefunction(s) for diagram number 270
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] );
+      helas_FFV1_2( w_fp[62], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] );
 
       // Amplitude(s) for diagram number 270
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[94], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[94], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5757,10 +5761,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 271 OF 1240 ***
 
       // Wavefunction(s) for diagram number 271
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[95] );
+      helas_FFV1P0_3( w_fp[62], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[95] );
 
       // Amplitude(s) for diagram number 271
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5775,7 +5779,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 272
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5788,7 +5792,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 273
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[94], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[94], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5798,10 +5802,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 274 OF 1240 ***
 
       // Wavefunction(s) for diagram number 274
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[96] );
+      helas_FFV1P0_3( w_fp[62], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[96] );
 
       // Amplitude(s) for diagram number 274
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5816,7 +5820,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 275
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5829,7 +5833,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 276
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[94], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[94], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5844,7 +5848,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 277
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[29], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[29], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5863,7 +5867,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 278
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[89], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[89], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5878,7 +5882,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 279
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5897,7 +5901,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 280
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5916,7 +5920,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 281
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5928,7 +5932,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5940,7 +5944,7 @@ namespace mg5amcCpu
       jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5956,10 +5960,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 282 OF 1240 ***
 
       // Wavefunction(s) for diagram number 282
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] );
+      helas_FFV1_1( w_fp[2], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] );
 
       // Amplitude(s) for diagram number 282
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5972,7 +5976,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 283
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5987,7 +5991,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 284
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6000,7 +6004,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 285
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6012,10 +6016,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 286 OF 1240 ***
 
       // Wavefunction(s) for diagram number 286
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] );
+      helas_FFV1_2( w_fp[62], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] );
 
       // Amplitude(s) for diagram number 286
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[97], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[97], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6025,10 +6029,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 287 OF 1240 ***
 
       // Wavefunction(s) for diagram number 287
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[98] );
+      helas_FFV1P0_3( w_fp[62], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[98] );
 
       // Amplitude(s) for diagram number 287
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[6], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6043,7 +6047,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 288
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6056,7 +6060,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 289
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[97], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[97], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6069,7 +6073,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 290
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6084,7 +6088,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 291
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6097,7 +6101,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 292
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[97], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[97], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6112,7 +6116,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 293
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[27], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[27], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6131,7 +6135,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 294
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[91], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[91], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6146,7 +6150,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 295
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6165,7 +6169,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 296
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6184,7 +6188,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 297
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6196,7 +6200,7 @@ namespace mg5amcCpu
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6208,7 +6212,7 @@ namespace mg5amcCpu
       jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6224,10 +6228,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 298 OF 1240 ***
 
       // Wavefunction(s) for diagram number 298
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] );
+      helas_FFV1_1( w_fp[2], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] );
 
       // Amplitude(s) for diagram number 298
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6240,7 +6244,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 299
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6255,7 +6259,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 300
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6268,7 +6272,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 301
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6280,10 +6284,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 302 OF 1240 ***
 
       // Wavefunction(s) for diagram number 302
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_FFV1_2( w_fp[62], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 302
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6296,7 +6300,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 303
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6311,7 +6315,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 304
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6324,7 +6328,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 305
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6337,7 +6341,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 306
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6352,7 +6356,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 307
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6365,7 +6369,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 308
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6380,7 +6384,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 309
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[24], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[24], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6399,7 +6403,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 310
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6411,10 +6415,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 311 OF 1240 ***
 
       // Wavefunction(s) for diagram number 311
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[62], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_FFV1_2( w_fp[62], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 311
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6426,7 +6430,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 312
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6435,10 +6439,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 313 OF 1240 ***
 
       // Wavefunction(s) for diagram number 313
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[33], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[100] );
+      helas_FFV1_1( w_fp[33], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[100] );
 
       // Amplitude(s) for diagram number 313
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6450,7 +6454,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 314
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6462,7 +6466,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 315
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6474,7 +6478,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 316
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6486,7 +6490,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 317
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6499,7 +6503,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 318
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[29], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6514,7 +6518,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 319
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[89], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[89], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6527,7 +6531,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 320
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6539,7 +6543,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 321
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6548,10 +6552,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 322 OF 1240 ***
 
       // Wavefunction(s) for diagram number 322
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[39], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] );
+      helas_FFV1_1( w_fp[39], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] );
 
       // Amplitude(s) for diagram number 322
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6563,7 +6567,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 323
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6575,7 +6579,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 324
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6587,7 +6591,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 325
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6599,7 +6603,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 326
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6612,7 +6616,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 327
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6627,7 +6631,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 328
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[91], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[91], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6640,7 +6644,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 329
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6652,7 +6656,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 330
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6661,10 +6665,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 331 OF 1240 ***
 
       // Wavefunction(s) for diagram number 331
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[47], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] );
+      helas_FFV1_1( w_fp[47], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] );
 
       // Amplitude(s) for diagram number 331
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6676,7 +6680,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 332
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6688,7 +6692,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 333
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6700,7 +6704,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 334
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6712,7 +6716,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 335
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6725,7 +6729,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 336
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6740,7 +6744,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 337
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6753,7 +6757,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 338
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6766,7 +6770,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 339
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6781,7 +6785,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 340
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6800,7 +6804,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 341
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6819,7 +6823,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 342
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6831,7 +6835,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6843,7 +6847,7 @@ namespace mg5amcCpu
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6862,7 +6866,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 343
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6877,7 +6881,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 344
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6890,7 +6894,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 345
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6903,7 +6907,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 346
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6918,7 +6922,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 347
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6937,7 +6941,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 348
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6956,7 +6960,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 349
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6968,7 +6972,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6980,7 +6984,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6999,7 +7003,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 350
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7014,7 +7018,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 351
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7027,7 +7031,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 352
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7040,7 +7044,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 353
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7055,7 +7059,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 354
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7074,7 +7078,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 355
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7093,7 +7097,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 356
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7105,7 +7109,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7117,7 +7121,7 @@ namespace mg5amcCpu
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7136,7 +7140,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 357
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7151,7 +7155,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 358
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7164,7 +7168,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 359
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[73], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[73], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7176,7 +7180,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[79], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[79], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7188,7 +7192,7 @@ namespace mg5amcCpu
       jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[80], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[80], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7207,7 +7211,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 360
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7215,7 +7219,7 @@ namespace mg5amcCpu
       jamp_sv[39] -= amp_sv[0];
       jamp_sv[63] -= amp_sv[0];
       jamp_sv[87] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7223,7 +7227,7 @@ namespace mg5amcCpu
       jamp_sv[57] += amp_sv[0];
       jamp_sv[63] -= amp_sv[0];
       jamp_sv[81] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7238,7 +7242,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 361
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[47], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[47], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7246,7 +7250,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[47], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[47], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7254,7 +7258,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[47], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[47], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7269,7 +7273,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 362
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[57], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[57], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7281,7 +7285,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[81], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[81], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7293,7 +7297,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[82], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[82], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7312,7 +7316,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 363
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7320,7 +7324,7 @@ namespace mg5amcCpu
       jamp_sv[45] -= amp_sv[0];
       jamp_sv[69] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7328,7 +7332,7 @@ namespace mg5amcCpu
       jamp_sv[59] += amp_sv[0];
       jamp_sv[69] -= amp_sv[0];
       jamp_sv[105] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7343,7 +7347,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 364
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[39], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[39], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7351,7 +7355,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[39], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[39], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7359,7 +7363,7 @@ namespace mg5amcCpu
       jamp_sv[87] += amp_sv[0];
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[39], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[39], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7374,7 +7378,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 365
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[55], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[55], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7386,7 +7390,7 @@ namespace mg5amcCpu
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[83], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[83], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7398,7 +7402,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[84], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[84], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7417,7 +7421,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 366
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7425,7 +7429,7 @@ namespace mg5amcCpu
       jamp_sv[47] -= amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7433,7 +7437,7 @@ namespace mg5amcCpu
       jamp_sv[83] += amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[107] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[34], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7448,7 +7452,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 367
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[33], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[33], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7456,7 +7460,7 @@ namespace mg5amcCpu
       jamp_sv[59] -= amp_sv[0];
       jamp_sv[65] -= amp_sv[0];
       jamp_sv[71] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[33], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[33], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7464,7 +7468,7 @@ namespace mg5amcCpu
       jamp_sv[63] += amp_sv[0];
       jamp_sv[65] -= amp_sv[0];
       jamp_sv[69] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[33], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[33], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7479,7 +7483,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 368
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7487,7 +7491,7 @@ namespace mg5amcCpu
       jamp_sv[71] -= amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7495,7 +7499,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7510,7 +7514,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 369
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[30], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[30], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7522,7 +7526,7 @@ namespace mg5amcCpu
       jamp_sv[71] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[31], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[31], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7534,7 +7538,7 @@ namespace mg5amcCpu
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[32], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[32], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7550,11 +7554,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 370 OF 1240 ***
 
       // Wavefunction(s) for diagram number 370
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[92] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[92] );
+      helas_FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 370
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7567,7 +7571,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 371
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7577,11 +7581,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 372 OF 1240 ***
 
       // Wavefunction(s) for diagram number 372
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[62] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[34] );
+      helas_VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[62] );
+      helas_FFV1P0_3( w_fp[3], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[34] );
 
       // Amplitude(s) for diagram number 372
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7600,7 +7604,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 373
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[85], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[85], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7612,10 +7616,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 374 OF 1240 ***
 
       // Wavefunction(s) for diagram number 374
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[86] );
+      helas_VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 374
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7634,7 +7638,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 375
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7646,12 +7650,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 376 OF 1240 ***
 
       // Wavefunction(s) for diagram number 376
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] );
+      helas_VVVV1P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] );
+      helas_VVVV3P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] );
+      helas_VVVV4P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] );
 
       // Amplitude(s) for diagram number 376
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7663,7 +7667,7 @@ namespace mg5amcCpu
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7675,7 +7679,7 @@ namespace mg5amcCpu
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7691,10 +7695,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 377 OF 1240 ***
 
       // Wavefunction(s) for diagram number 377
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[95] );
+      helas_FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[95] );
 
       // Amplitude(s) for diagram number 377
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[95], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[95], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7704,10 +7708,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 378 OF 1240 ***
 
       // Wavefunction(s) for diagram number 378
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
+      helas_FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
 
       // Amplitude(s) for diagram number 378
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7720,7 +7724,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 379
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7735,7 +7739,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 380
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[95], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[95], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7745,10 +7749,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 381 OF 1240 ***
 
       // Wavefunction(s) for diagram number 381
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[101] );
+      helas_FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[101] );
 
       // Amplitude(s) for diagram number 381
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[101], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[101], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7761,7 +7765,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 382
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[77], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7776,7 +7780,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 383
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[95], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[95], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7791,7 +7795,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 384
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7803,10 +7807,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 385 OF 1240 ***
 
       // Wavefunction(s) for diagram number 385
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[95] );
+      helas_VVV1P0_1( w_fp[92], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[95] );
 
       // Amplitude(s) for diagram number 385
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7822,10 +7826,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 386 OF 1240 ***
 
       // Wavefunction(s) for diagram number 386
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
+      helas_FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
 
       // Amplitude(s) for diagram number 386
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7838,7 +7842,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 387
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7848,10 +7852,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 388 OF 1240 ***
 
       // Wavefunction(s) for diagram number 388
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[103] );
+      helas_FFV1P0_3( w_fp[52], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[103] );
 
       // Amplitude(s) for diagram number 388
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7870,7 +7874,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 389
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7885,7 +7889,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 390
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7904,7 +7908,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 391
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7919,7 +7923,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 392
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7931,7 +7935,7 @@ namespace mg5amcCpu
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7943,7 +7947,7 @@ namespace mg5amcCpu
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7959,10 +7963,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 393 OF 1240 ***
 
       // Wavefunction(s) for diagram number 393
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
+      helas_FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
 
       // Amplitude(s) for diagram number 393
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7972,10 +7976,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 394 OF 1240 ***
 
       // Wavefunction(s) for diagram number 394
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[105] );
+      helas_FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[105] );
 
       // Amplitude(s) for diagram number 394
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[105], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[105], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7988,7 +7992,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 395
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[39], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8003,7 +8007,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 396
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8013,10 +8017,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 397 OF 1240 ***
 
       // Wavefunction(s) for diagram number 397
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
+      helas_FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
 
       // Amplitude(s) for diagram number 397
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[106], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[106], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8029,7 +8033,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 398
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[47], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8044,7 +8048,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 399
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8059,7 +8063,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 400
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[102], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[102], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8074,7 +8078,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 401
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8093,7 +8097,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 402
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8108,7 +8112,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 403
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8127,7 +8131,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 404
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8142,7 +8146,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 405
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8161,7 +8165,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 406
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[94], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[94], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8180,7 +8184,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 407
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8199,7 +8203,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 408
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8219,7 +8223,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8239,7 +8243,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8263,10 +8267,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 409 OF 1240 ***
 
       // Wavefunction(s) for diagram number 409
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 409
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8290,10 +8294,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 410 OF 1240 ***
 
       // Wavefunction(s) for diagram number 410
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[107] );
+      helas_VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[107] );
 
       // Amplitude(s) for diagram number 410
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8320,7 +8324,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 411
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8347,7 +8351,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 412
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8366,7 +8370,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 413
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[106], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[106], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8381,7 +8385,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 414
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8396,7 +8400,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 415
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8415,7 +8419,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 416
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[102], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[102], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8430,7 +8434,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 417
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[101], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[101], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8445,7 +8449,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 418
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8460,7 +8464,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 419
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8479,7 +8483,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 420
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8494,7 +8498,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 421
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8513,7 +8517,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 422
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[97], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[97], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8532,7 +8536,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 423
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8551,7 +8555,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 424
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8571,7 +8575,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8591,7 +8595,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8615,10 +8619,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 425 OF 1240 ***
 
       // Wavefunction(s) for diagram number 425
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 425
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8645,7 +8649,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 426
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8672,7 +8676,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 427
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[8], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[8], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8699,7 +8703,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 428
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8718,7 +8722,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 429
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[105], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[105], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8733,7 +8737,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 430
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8748,7 +8752,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 431
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8767,7 +8771,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 432
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[102], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[102], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8782,7 +8786,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 433
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8794,10 +8798,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 434 OF 1240 ***
 
       // Wavefunction(s) for diagram number 434
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 434
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8824,7 +8828,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 435
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8851,7 +8855,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 436
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8871,7 +8875,7 @@ namespace mg5amcCpu
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8891,7 +8895,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8915,10 +8919,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 437 OF 1240 ***
 
       // Wavefunction(s) for diagram number 437
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[108] );
+      helas_VVV1P0_1( w_fp[1], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[108] );
 
       // Amplitude(s) for diagram number 437
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8945,7 +8949,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 438
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8972,7 +8976,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 439
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8992,7 +8996,7 @@ namespace mg5amcCpu
       jamp_sv[109] += amp_sv[0];
       jamp_sv[115] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9012,7 +9016,7 @@ namespace mg5amcCpu
       jamp_sv[104] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9039,7 +9043,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 440
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9066,7 +9070,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 441
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9093,7 +9097,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 442
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9113,7 +9117,7 @@ namespace mg5amcCpu
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9133,7 +9137,7 @@ namespace mg5amcCpu
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9157,12 +9161,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 443 OF 1240 ***
 
       // Wavefunction(s) for diagram number 443
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
+      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
+      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 443
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9182,7 +9186,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9202,7 +9206,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9226,12 +9230,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 444 OF 1240 ***
 
       // Wavefunction(s) for diagram number 444
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[113] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[114] );
+      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] );
+      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[113] );
+      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[114] );
 
       // Amplitude(s) for diagram number 444
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9251,7 +9255,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9271,7 +9275,7 @@ namespace mg5amcCpu
       jamp_sv[94] += amp_sv[0];
       jamp_sv[98] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[114], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[114], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9298,7 +9302,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 445
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9318,7 +9322,7 @@ namespace mg5amcCpu
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9338,7 +9342,7 @@ namespace mg5amcCpu
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9365,7 +9369,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 446
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9385,7 +9389,7 @@ namespace mg5amcCpu
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9405,7 +9409,7 @@ namespace mg5amcCpu
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9432,7 +9436,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 447
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[29], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[29], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9459,7 +9463,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 448
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9486,7 +9490,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 449
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9513,7 +9517,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 450
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9532,7 +9536,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 451
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[44], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[44], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9547,7 +9551,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 452
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9560,7 +9564,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 453
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9573,7 +9577,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 454
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[89], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[89], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9588,7 +9592,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 455
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9607,7 +9611,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 456
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9619,7 +9623,7 @@ namespace mg5amcCpu
       jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[113], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[113], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9631,7 +9635,7 @@ namespace mg5amcCpu
       jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[114], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[114], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9650,7 +9654,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 457
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9665,7 +9669,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 458
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[105], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[105], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9678,7 +9682,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 459
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[101], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[101], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9691,7 +9695,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 460
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9710,7 +9714,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 461
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[50], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[50], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9725,7 +9729,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 462
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9738,7 +9742,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 463
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9751,7 +9755,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 464
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[91], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[91], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9766,7 +9770,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 465
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9785,7 +9789,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 466
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9797,7 +9801,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9809,7 +9813,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9828,7 +9832,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 467
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[47], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9843,7 +9847,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 468
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9856,7 +9860,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 469
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9869,7 +9873,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 470
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9888,7 +9892,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 471
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9903,7 +9907,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 472
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9916,7 +9920,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 473
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9929,7 +9933,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 474
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9944,7 +9948,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 475
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9963,7 +9967,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 476
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9975,7 +9979,7 @@ namespace mg5amcCpu
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[113], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[113], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9987,7 +9991,7 @@ namespace mg5amcCpu
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[114], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[114], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10006,7 +10010,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 477
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10025,7 +10029,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 478
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10040,7 +10044,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 479
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10053,7 +10057,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 480
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10066,7 +10070,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 481
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10081,7 +10085,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 482
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10100,7 +10104,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 483
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10112,7 +10116,7 @@ namespace mg5amcCpu
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10124,7 +10128,7 @@ namespace mg5amcCpu
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10143,7 +10147,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 484
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[18], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[18], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10162,7 +10166,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 485
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10181,7 +10185,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 486
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10200,7 +10204,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 487
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10215,7 +10219,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 488
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10234,7 +10238,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 489
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10249,7 +10253,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 490
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10261,7 +10265,7 @@ namespace mg5amcCpu
       jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10273,7 +10277,7 @@ namespace mg5amcCpu
       jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10292,7 +10296,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 491
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10304,7 +10308,7 @@ namespace mg5amcCpu
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10316,7 +10320,7 @@ namespace mg5amcCpu
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10335,7 +10339,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 492
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[55], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[55], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10355,7 +10359,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[83], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[83], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10375,7 +10379,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[84], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[84], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10399,11 +10403,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 493 OF 1240 ***
 
       // Wavefunction(s) for diagram number 493
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[92] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[92] );
+      helas_FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 493
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10416,7 +10420,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 494
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10426,10 +10430,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 495 OF 1240 ***
 
       // Wavefunction(s) for diagram number 495
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[102] );
+      helas_VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[102] );
 
       // Amplitude(s) for diagram number 495
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[102], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[102], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10448,7 +10452,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 496
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[85], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[85], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10460,10 +10464,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 497 OF 1240 ***
 
       // Wavefunction(s) for diagram number 497
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 497
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10482,7 +10486,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 498
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[87], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[87], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10494,12 +10498,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 499 OF 1240 ***
 
       // Wavefunction(s) for diagram number 499
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[110] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[109] );
+      helas_VVVV1P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_VVVV3P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[110] );
+      helas_VVVV4P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[109] );
 
       // Amplitude(s) for diagram number 499
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10511,7 +10515,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10523,7 +10527,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10539,10 +10543,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 500 OF 1240 ***
 
       // Wavefunction(s) for diagram number 500
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
+      helas_FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
 
       // Amplitude(s) for diagram number 500
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[62], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[62], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10552,10 +10556,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 501 OF 1240 ***
 
       // Wavefunction(s) for diagram number 501
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
+      helas_FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
 
       // Amplitude(s) for diagram number 501
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[114], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[114], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10568,7 +10572,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 502
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[77], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10583,7 +10587,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 503
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[62], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[62], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10593,10 +10597,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 504 OF 1240 ***
 
       // Wavefunction(s) for diagram number 504
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] );
+      helas_FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] );
 
       // Amplitude(s) for diagram number 504
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[113], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[113], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10609,7 +10613,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 505
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[77], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10624,7 +10628,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 506
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[62], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[62], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10639,7 +10643,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 507
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10651,10 +10655,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 508 OF 1240 ***
 
       // Wavefunction(s) for diagram number 508
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[62] );
+      helas_VVV1P0_1( w_fp[92], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[62] );
 
       // Amplitude(s) for diagram number 508
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10670,10 +10674,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 509 OF 1240 ***
 
       // Wavefunction(s) for diagram number 509
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[112] );
+      helas_FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[112] );
 
       // Amplitude(s) for diagram number 509
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10686,7 +10690,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 510
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10699,7 +10703,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 511
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[102], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[102], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10718,7 +10722,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 512
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10733,7 +10737,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 513
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10752,7 +10756,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 514
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10767,7 +10771,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 515
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10779,7 +10783,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10791,7 +10795,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10807,10 +10811,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 516 OF 1240 ***
 
       // Wavefunction(s) for diagram number 516
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] );
+      helas_FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] );
 
       // Amplitude(s) for diagram number 516
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10820,10 +10824,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 517 OF 1240 ***
 
       // Wavefunction(s) for diagram number 517
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
+      helas_FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
 
       // Amplitude(s) for diagram number 517
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[98], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[98], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10836,7 +10840,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 518
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10851,7 +10855,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 519
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10861,10 +10865,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 520 OF 1240 ***
 
       // Wavefunction(s) for diagram number 520
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
+      helas_FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
 
       // Amplitude(s) for diagram number 520
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10877,7 +10881,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 521
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[47], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10892,7 +10896,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 522
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10907,7 +10911,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 523
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[112], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[112], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10922,7 +10926,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 524
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10941,7 +10945,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 525
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10956,7 +10960,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 526
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[112], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10975,7 +10979,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 527
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10990,7 +10994,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 528
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11009,7 +11013,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 529
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[93], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[93], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11028,7 +11032,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 530
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11047,7 +11051,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 531
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11067,7 +11071,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[114] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11087,7 +11091,7 @@ namespace mg5amcCpu
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11111,10 +11115,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 532 OF 1240 ***
 
       // Wavefunction(s) for diagram number 532
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[86] );
+      helas_VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 532
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11138,10 +11142,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 533 OF 1240 ***
 
       // Wavefunction(s) for diagram number 533
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[101] );
+      helas_VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[101] );
 
       // Amplitude(s) for diagram number 533
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[6], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11168,7 +11172,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 534
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[8], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[8], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11195,7 +11199,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 535
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11214,7 +11218,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 536
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11229,7 +11233,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 537
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11244,7 +11248,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 538
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11263,7 +11267,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 539
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[112], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[112], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11278,7 +11282,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 540
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[113], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[113], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11293,7 +11297,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 541
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11308,7 +11312,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 542
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[112], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11327,7 +11331,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 543
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11342,7 +11346,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 544
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11361,7 +11365,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 545
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[97], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[97], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11380,7 +11384,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 546
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11399,7 +11403,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 547
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11419,7 +11423,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[103] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11439,7 +11443,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11463,10 +11467,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 548 OF 1240 ***
 
       // Wavefunction(s) for diagram number 548
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[86] );
+      helas_VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 548
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11493,7 +11497,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 549
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11520,7 +11524,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 550
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[8], w_fp[102], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[8], w_fp[102], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11547,7 +11551,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 551
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11566,7 +11570,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 552
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[98], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[98], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11581,7 +11585,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 553
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11596,7 +11600,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 554
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11615,7 +11619,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 555
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[112], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[112], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11630,7 +11634,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 556
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[114], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[114], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11642,10 +11646,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 557 OF 1240 ***
 
       // Wavefunction(s) for diagram number 557
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[86] );
+      helas_VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 557
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11672,7 +11676,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 558
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11699,7 +11703,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 559
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11719,7 +11723,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11739,7 +11743,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11766,7 +11770,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 560
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[102], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[102], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11793,7 +11797,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 561
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[102], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[102], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11820,7 +11824,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 562
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11840,7 +11844,7 @@ namespace mg5amcCpu
       jamp_sv[109] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11860,7 +11864,7 @@ namespace mg5amcCpu
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11887,7 +11891,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 563
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11914,7 +11918,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 564
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11941,7 +11945,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 565
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11961,7 +11965,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11981,7 +11985,7 @@ namespace mg5amcCpu
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12005,12 +12009,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 566 OF 1240 ***
 
       // Wavefunction(s) for diagram number 566
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] );
+      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] );
+      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] );
+      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] );
 
       // Amplitude(s) for diagram number 566
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12030,7 +12034,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[114] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12050,7 +12054,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12074,12 +12078,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 567 OF 1240 ***
 
       // Wavefunction(s) for diagram number 567
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] );
+      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] );
+      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] );
+      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] );
 
       // Amplitude(s) for diagram number 567
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12099,7 +12103,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[103] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12119,7 +12123,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12146,7 +12150,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 568
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12166,7 +12170,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12186,7 +12190,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12213,7 +12217,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 569
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12233,7 +12237,7 @@ namespace mg5amcCpu
       jamp_sv[110] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12253,7 +12257,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12280,7 +12284,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 570
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[27], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[27], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12307,7 +12311,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 571
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12334,7 +12338,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 572
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12361,7 +12365,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 573
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12380,7 +12384,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 574
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[36], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[36], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12395,7 +12399,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 575
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12408,7 +12412,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 576
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12421,7 +12425,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 577
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[100], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[100], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12436,7 +12440,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 578
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12455,7 +12459,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 579
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12467,7 +12471,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12479,7 +12483,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12498,7 +12502,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 580
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12513,7 +12517,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 581
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[98], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[98], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12526,7 +12530,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 582
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[113], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[113], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12539,7 +12543,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 583
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12558,7 +12562,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 584
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[49], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[49], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12573,7 +12577,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 585
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12586,7 +12590,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 586
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12599,7 +12603,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 587
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[91], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[91], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12614,7 +12618,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 588
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[102], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[102], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12633,7 +12637,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 589
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12645,7 +12649,7 @@ namespace mg5amcCpu
       jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12657,7 +12661,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12676,7 +12680,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 590
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[47], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12691,7 +12695,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 591
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12704,7 +12708,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 592
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[114], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[114], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12717,7 +12721,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 593
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12736,7 +12740,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 594
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12751,7 +12755,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 595
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12764,7 +12768,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 596
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12777,7 +12781,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 597
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12792,7 +12796,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 598
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12811,7 +12815,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 599
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12823,7 +12827,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12835,7 +12839,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12854,7 +12858,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 600
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12873,7 +12877,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 601
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12888,7 +12892,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 602
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12901,7 +12905,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 603
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12914,7 +12918,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 604
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12929,7 +12933,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 605
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[102], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[102], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12948,7 +12952,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 606
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12960,7 +12964,7 @@ namespace mg5amcCpu
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12972,7 +12976,7 @@ namespace mg5amcCpu
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12991,7 +12995,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 607
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[15], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[15], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13010,7 +13014,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 608
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13029,7 +13033,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 609
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[112], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13048,7 +13052,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 610
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13063,7 +13067,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 611
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13082,7 +13086,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 612
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13097,7 +13101,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 613
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[112], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13109,7 +13113,7 @@ namespace mg5amcCpu
       jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[112], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13121,7 +13125,7 @@ namespace mg5amcCpu
       jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[112], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13140,7 +13144,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 614
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13152,7 +13156,7 @@ namespace mg5amcCpu
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13164,7 +13168,7 @@ namespace mg5amcCpu
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13183,7 +13187,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 615
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[57], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[57], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13203,7 +13207,7 @@ namespace mg5amcCpu
       jamp_sv[77] += amp_sv[0];
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[81], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[81], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13223,7 +13227,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[82], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[82], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13247,11 +13251,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 616 OF 1240 ***
 
       // Wavefunction(s) for diagram number 616
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[92] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[92] );
+      helas_FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 616
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13264,7 +13268,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 617
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13274,10 +13278,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 618 OF 1240 ***
 
       // Wavefunction(s) for diagram number 618
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[112] );
+      helas_VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[112] );
 
       // Amplitude(s) for diagram number 618
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[112], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[112], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13296,7 +13300,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 619
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13308,10 +13312,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 620 OF 1240 ***
 
       // Wavefunction(s) for diagram number 620
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[86] );
+      helas_VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 620
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13330,7 +13334,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 621
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[87], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[87], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13342,12 +13346,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 622 OF 1240 ***
 
       // Wavefunction(s) for diagram number 622
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[107] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[95] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[105] );
+      helas_VVVV1P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[107] );
+      helas_VVVV3P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[95] );
+      helas_VVVV4P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[105] );
 
       // Amplitude(s) for diagram number 622
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13359,7 +13363,7 @@ namespace mg5amcCpu
       jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13371,7 +13375,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13387,10 +13391,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 623 OF 1240 ***
 
       // Wavefunction(s) for diagram number 623
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
+      helas_FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
 
       // Amplitude(s) for diagram number 623
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13400,10 +13404,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 624 OF 1240 ***
 
       // Wavefunction(s) for diagram number 624
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] );
+      helas_FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] );
 
       // Amplitude(s) for diagram number 624
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13416,7 +13420,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 625
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13431,7 +13435,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 626
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13441,10 +13445,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 627 OF 1240 ***
 
       // Wavefunction(s) for diagram number 627
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
+      helas_FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
 
       // Amplitude(s) for diagram number 627
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13457,7 +13461,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 628
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[77], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13472,7 +13476,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 629
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13487,7 +13491,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 630
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13499,10 +13503,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 631 OF 1240 ***
 
       // Wavefunction(s) for diagram number 631
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[102] );
+      helas_VVV1P0_1( w_fp[92], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[102] );
 
       // Amplitude(s) for diagram number 631
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13518,10 +13522,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 632 OF 1240 ***
 
       // Wavefunction(s) for diagram number 632
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[96] );
+      helas_FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[96] );
 
       // Amplitude(s) for diagram number 632
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13534,7 +13538,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 633
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13547,7 +13551,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 634
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[112], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[112], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13566,7 +13570,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 635
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13581,7 +13585,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 636
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13600,7 +13604,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 637
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13615,7 +13619,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 638
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13627,7 +13631,7 @@ namespace mg5amcCpu
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13639,7 +13643,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13655,10 +13659,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 639 OF 1240 ***
 
       // Wavefunction(s) for diagram number 639
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
+      helas_FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
 
       // Amplitude(s) for diagram number 639
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13668,10 +13672,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 640 OF 1240 ***
 
       // Wavefunction(s) for diagram number 640
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
+      helas_FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
 
       // Amplitude(s) for diagram number 640
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13684,7 +13688,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 641
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13699,7 +13703,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 642
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13709,10 +13713,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 643 OF 1240 ***
 
       // Wavefunction(s) for diagram number 643
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
+      helas_FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
 
       // Amplitude(s) for diagram number 643
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13725,7 +13729,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 644
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[39], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13740,7 +13744,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 645
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13755,7 +13759,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 646
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[96], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[96], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13770,7 +13774,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 647
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13789,7 +13793,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 648
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13804,7 +13808,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 649
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[96], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13823,7 +13827,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 650
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13838,7 +13842,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 651
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13857,7 +13861,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 652
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[93], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[93], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13876,7 +13880,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 653
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13895,7 +13899,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 654
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13915,7 +13919,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[96] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13935,7 +13939,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13959,10 +13963,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 655 OF 1240 ***
 
       // Wavefunction(s) for diagram number 655
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 655
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13986,10 +13990,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 656 OF 1240 ***
 
       // Wavefunction(s) for diagram number 656
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[113] );
+      helas_VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[113] );
 
       // Amplitude(s) for diagram number 656
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14016,7 +14020,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 657
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14043,7 +14047,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 658
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14062,7 +14066,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 659
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14077,7 +14081,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 660
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14092,7 +14096,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 661
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14111,7 +14115,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 662
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[96], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[96], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14126,7 +14130,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 663
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14141,7 +14145,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 664
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14156,7 +14160,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 665
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[96], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14175,7 +14179,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 666
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14190,7 +14194,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 667
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14209,7 +14213,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 668
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[94], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[94], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14228,7 +14232,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 669
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14247,7 +14251,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 670
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14267,7 +14271,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14287,7 +14291,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14311,10 +14315,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 671 OF 1240 ***
 
       // Wavefunction(s) for diagram number 671
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 671
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14341,7 +14345,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 672
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14368,7 +14372,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 673
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[8], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[8], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14395,7 +14399,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 674
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14414,7 +14418,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 675
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[114], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14429,7 +14433,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 676
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14444,7 +14448,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 677
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14463,7 +14467,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 678
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[96], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[96], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14478,7 +14482,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 679
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14490,10 +14494,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 680 OF 1240 ***
 
       // Wavefunction(s) for diagram number 680
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 680
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14520,7 +14524,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 681
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14547,7 +14551,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 682
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14567,7 +14571,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[97] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14587,7 +14591,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[96] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14614,7 +14618,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 683
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[112], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[112], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14641,7 +14645,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 684
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[112], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[112], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14668,7 +14672,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 685
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14688,7 +14692,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14708,7 +14712,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14735,7 +14739,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 686
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14762,7 +14766,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 687
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14789,7 +14793,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 688
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14809,7 +14813,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14829,7 +14833,7 @@ namespace mg5amcCpu
       jamp_sv[91] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14853,12 +14857,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 689 OF 1240 ***
 
       // Wavefunction(s) for diagram number 689
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[98] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[62] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[101] );
+      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[98] );
+      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[62] );
+      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[101] );
 
       // Amplitude(s) for diagram number 689
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14878,7 +14882,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[96] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14898,7 +14902,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14922,12 +14926,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 690 OF 1240 ***
 
       // Wavefunction(s) for diagram number 690
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
+      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
+      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 690
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14947,7 +14951,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14967,7 +14971,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14994,7 +14998,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 691
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15014,7 +15018,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[99] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15034,7 +15038,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15061,7 +15065,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 692
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15081,7 +15085,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[96] -= amp_sv[0];
       jamp_sv[97] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15101,7 +15105,7 @@ namespace mg5amcCpu
       jamp_sv[97] += amp_sv[0];
       jamp_sv[99] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15128,7 +15132,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 693
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[24], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[24], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15155,7 +15159,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 694
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15182,7 +15186,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 695
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[102], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[102], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15209,7 +15213,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 696
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15228,7 +15232,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 697
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[35], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[35], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15243,7 +15247,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 698
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15256,7 +15260,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 699
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15269,7 +15273,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 700
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[100], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[100], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15284,7 +15288,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 701
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15303,7 +15307,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 702
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15315,7 +15319,7 @@ namespace mg5amcCpu
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15327,7 +15331,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15346,7 +15350,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 703
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15361,7 +15365,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 704
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15374,7 +15378,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 705
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15387,7 +15391,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 706
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15406,7 +15410,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 707
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[43], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[43], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15421,7 +15425,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 708
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15434,7 +15438,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 709
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15447,7 +15451,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 710
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[89], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[89], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15462,7 +15466,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 711
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[112], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[112], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15481,7 +15485,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 712
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15493,7 +15497,7 @@ namespace mg5amcCpu
       jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15505,7 +15509,7 @@ namespace mg5amcCpu
       jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15524,7 +15528,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 713
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15539,7 +15543,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 714
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15552,7 +15556,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 715
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[88], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[88], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15565,7 +15569,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 716
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15584,7 +15588,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 717
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15599,7 +15603,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 718
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15612,7 +15616,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 719
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15625,7 +15629,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 720
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15640,7 +15644,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 721
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[86], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15659,7 +15663,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 722
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15671,7 +15675,7 @@ namespace mg5amcCpu
       jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15683,7 +15687,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15702,7 +15706,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 723
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[104], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15721,7 +15725,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 724
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15736,7 +15740,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 725
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15749,7 +15753,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 726
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15762,7 +15766,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 727
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15777,7 +15781,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 728
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[112], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[112], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15796,7 +15800,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 729
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15808,7 +15812,7 @@ namespace mg5amcCpu
       jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15820,7 +15824,7 @@ namespace mg5amcCpu
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15839,7 +15843,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 730
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[17], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[17], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15858,7 +15862,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 731
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15877,7 +15881,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 732
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[96], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15896,7 +15900,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 733
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15911,7 +15915,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 734
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15930,7 +15934,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 735
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15945,7 +15949,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 736
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[96], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15957,7 +15961,7 @@ namespace mg5amcCpu
       jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[96], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15969,7 +15973,7 @@ namespace mg5amcCpu
       jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[96], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15988,7 +15992,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 737
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16000,7 +16004,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16012,7 +16016,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[81] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16031,7 +16035,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 738
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[73], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[73], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16051,7 +16055,7 @@ namespace mg5amcCpu
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[79], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[79], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16071,7 +16075,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[92], w_fp[80], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[92], w_fp[80], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16095,10 +16099,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 739 OF 1240 ***
 
       // Wavefunction(s) for diagram number 739
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[77], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[92] );
+      helas_FFV1_1( w_fp[77], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[92] );
 
       // Amplitude(s) for diagram number 739
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16110,7 +16114,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 740
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16119,10 +16123,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 741 OF 1240 ***
 
       // Wavefunction(s) for diagram number 741
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[46], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_FFV1_2( w_fp[46], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 741
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16134,7 +16138,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 742
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16146,7 +16150,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 743
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16158,7 +16162,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 744
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16170,7 +16174,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 745
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[92], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[92], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16183,7 +16187,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 746
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16193,10 +16197,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 747 OF 1240 ***
 
       // Wavefunction(s) for diagram number 747
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[96] );
+      helas_VVV1P0_1( w_fp[0], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[96] );
 
       // Amplitude(s) for diagram number 747
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[77], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16211,7 +16215,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 748
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16223,7 +16227,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 749
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16232,10 +16236,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 750 OF 1240 ***
 
       // Wavefunction(s) for diagram number 750
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[38], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
+      helas_FFV1_2( w_fp[38], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
 
       // Amplitude(s) for diagram number 750
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16247,7 +16251,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 751
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16259,7 +16263,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 752
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16271,7 +16275,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 753
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16283,7 +16287,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 754
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[92], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[92], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16296,7 +16300,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 755
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16306,10 +16310,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 756 OF 1240 ***
 
       // Wavefunction(s) for diagram number 756
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[101] );
+      helas_VVV1P0_1( w_fp[0], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[101] );
 
       // Amplitude(s) for diagram number 756
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[77], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16324,7 +16328,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 757
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16336,7 +16340,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 758
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16345,10 +16349,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 759 OF 1240 ***
 
       // Wavefunction(s) for diagram number 759
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[41], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
+      helas_FFV1_2( w_fp[41], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
 
       // Amplitude(s) for diagram number 759
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16360,7 +16364,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 760
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16372,7 +16376,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 761
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16384,7 +16388,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 762
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16396,7 +16400,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 763
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[92], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[92], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16409,7 +16413,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 764
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16419,10 +16423,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 765 OF 1240 ***
 
       // Wavefunction(s) for diagram number 765
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[98] );
+      helas_VVV1P0_1( w_fp[0], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[98] );
 
       // Amplitude(s) for diagram number 765
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[77], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16437,7 +16441,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 766
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16450,7 +16454,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 767
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[92], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16465,7 +16469,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 768
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[98], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16484,7 +16488,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 769
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[85], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[85], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16499,7 +16503,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 770
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[34], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[34], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16518,7 +16522,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 771
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16528,12 +16532,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 772 OF 1240 ***
 
       // Wavefunction(s) for diagram number 772
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[85] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[85] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 772
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[85], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[85], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16545,7 +16549,7 @@ namespace mg5amcCpu
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16557,7 +16561,7 @@ namespace mg5amcCpu
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16576,7 +16580,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 773
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16589,7 +16593,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 774
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[92], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16604,7 +16608,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 775
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[101], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[101], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16623,7 +16627,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 776
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16638,7 +16642,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 777
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[34], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[34], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16657,7 +16661,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 778
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16667,12 +16671,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 779 OF 1240 ***
 
       // Wavefunction(s) for diagram number 779
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[9] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[9] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
 
       // Amplitude(s) for diagram number 779
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16684,7 +16688,7 @@ namespace mg5amcCpu
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16696,7 +16700,7 @@ namespace mg5amcCpu
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16715,7 +16719,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 780
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16728,7 +16732,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 781
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[92], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16743,7 +16747,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 782
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[96], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[96], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16762,7 +16766,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 783
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[87], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[87], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16777,7 +16781,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 784
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[34], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[34], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16796,7 +16800,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 785
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16806,12 +16810,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 786 OF 1240 ***
 
       // Wavefunction(s) for diagram number 786
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[87] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[34] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[86] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[87] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[34] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 786
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[87], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[87], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16823,7 +16827,7 @@ namespace mg5amcCpu
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[34], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[34], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16835,7 +16839,7 @@ namespace mg5amcCpu
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[40] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16854,7 +16858,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 787
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[92], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16862,7 +16866,7 @@ namespace mg5amcCpu
       jamp_sv[25] -= amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[92], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16870,7 +16874,7 @@ namespace mg5amcCpu
       jamp_sv[26] += amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[92], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16882,12 +16886,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 788 OF 1240 ***
 
       // Wavefunction(s) for diagram number 788
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[30], COUPs[0], 1.0, 0., 0., w_fp[92] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[31], COUPs[0], 1.0, 0., 0., w_fp[88] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[32], COUPs[0], 1.0, 0., 0., w_fp[106] );
+      helas_VVV1P0_1( w_fp[0], w_fp[30], COUPs[0], 1.0, 0., 0., w_fp[92] );
+      helas_VVV1P0_1( w_fp[0], w_fp[31], COUPs[0], 1.0, 0., 0., w_fp[88] );
+      helas_VVV1P0_1( w_fp[0], w_fp[32], COUPs[0], 1.0, 0., 0., w_fp[106] );
 
       // Amplitude(s) for diagram number 788
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[92], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[92], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16899,7 +16903,7 @@ namespace mg5amcCpu
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16911,7 +16915,7 @@ namespace mg5amcCpu
       jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[106], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[106], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16927,10 +16931,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 789 OF 1240 ***
 
       // Wavefunction(s) for diagram number 789
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[52], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
+      helas_FFV1_2( w_fp[52], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
 
       // Amplitude(s) for diagram number 789
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16942,7 +16946,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 790
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16951,10 +16955,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 791 OF 1240 ***
 
       // Wavefunction(s) for diagram number 791
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[33], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
+      helas_FFV1_1( w_fp[33], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
 
       // Amplitude(s) for diagram number 791
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16966,7 +16970,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 792
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16978,7 +16982,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 793
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16990,7 +16994,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 794
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17002,7 +17006,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 795
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17015,7 +17019,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 796
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[114], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[114], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17028,7 +17032,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 797
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[33], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17043,7 +17047,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 798
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17055,7 +17059,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 799
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17064,10 +17068,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 800 OF 1240 ***
 
       // Wavefunction(s) for diagram number 800
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[39], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
+      helas_FFV1_1( w_fp[39], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
 
       // Amplitude(s) for diagram number 800
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17079,7 +17083,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 801
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17091,7 +17095,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 802
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17103,7 +17107,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 803
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17115,7 +17119,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 804
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17128,7 +17132,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 805
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[102], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[102], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17141,7 +17145,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 806
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[39], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17156,7 +17160,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 807
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17168,7 +17172,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 808
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17177,10 +17181,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 809 OF 1240 ***
 
       // Wavefunction(s) for diagram number 809
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[47], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] );
+      helas_FFV1_1( w_fp[47], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] );
 
       // Amplitude(s) for diagram number 809
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17192,7 +17196,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 810
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17204,7 +17208,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 811
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17216,7 +17220,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 812
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17228,7 +17232,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 813
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17241,7 +17245,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 814
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[113], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[113], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17254,7 +17258,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 815
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[47], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17269,7 +17273,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 816
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17282,7 +17286,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 817
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17297,7 +17301,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 818
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[98], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17316,7 +17320,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 819
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17331,7 +17335,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 820
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[103], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[103], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17350,7 +17354,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 821
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17363,7 +17367,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 822
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[85], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[85], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17375,7 +17379,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17387,7 +17391,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17406,7 +17410,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 823
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17419,7 +17423,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 824
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17434,7 +17438,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 825
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[101], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[101], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17453,7 +17457,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 826
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17468,7 +17472,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 827
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[103], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[103], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17487,7 +17491,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 828
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17500,7 +17504,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 829
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17512,7 +17516,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17524,7 +17528,7 @@ namespace mg5amcCpu
       jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17543,7 +17547,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 830
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17556,7 +17560,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 831
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17571,7 +17575,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 832
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[96], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[96], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17590,7 +17594,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 833
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17605,7 +17609,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 834
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[103], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[103], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17624,7 +17628,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 835
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[56], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17637,7 +17641,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 836
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[87], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[87], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17649,7 +17653,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[34], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[34], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17661,7 +17665,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17680,7 +17684,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 837
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17688,7 +17692,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17696,7 +17700,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17711,7 +17715,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 838
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[92], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[92], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17723,7 +17727,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17735,7 +17739,7 @@ namespace mg5amcCpu
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[106], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[106], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17751,10 +17755,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 839 OF 1240 ***
 
       // Wavefunction(s) for diagram number 839
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[90] );
+      helas_VVV1P0_1( w_fp[0], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[90] );
 
       // Amplitude(s) for diagram number 839
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[90], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17781,7 +17785,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 840
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[90], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17808,7 +17812,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 841
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17828,7 +17832,7 @@ namespace mg5amcCpu
       jamp_sv[98] -= amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17848,7 +17852,7 @@ namespace mg5amcCpu
       jamp_sv[115] += amp_sv[0];
       jamp_sv[117] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17872,10 +17876,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 842 OF 1240 ***
 
       // Wavefunction(s) for diagram number 842
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[56] );
+      helas_VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[56] );
 
       // Amplitude(s) for diagram number 842
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17902,7 +17906,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 843
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17929,7 +17933,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 844
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17949,7 +17953,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17969,7 +17973,7 @@ namespace mg5amcCpu
       jamp_sv[95] += amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17996,7 +18000,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 845
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[63], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[63], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18023,7 +18027,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 846
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[64], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[64], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18047,12 +18051,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 847 OF 1240 ***
 
       // Wavefunction(s) for diagram number 847
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[103] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[22] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[103] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[22] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 847
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[103], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[103], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18072,7 +18076,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18092,7 +18096,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18116,12 +18120,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 848 OF 1240 ***
 
       // Wavefunction(s) for diagram number 848
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[105] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[95] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[107] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[105] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[95] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[107] );
 
       // Amplitude(s) for diagram number 848
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18141,7 +18145,7 @@ namespace mg5amcCpu
       jamp_sv[95] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18161,7 +18165,7 @@ namespace mg5amcCpu
       jamp_sv[91] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[98] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18185,12 +18189,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 849 OF 1240 ***
 
       // Wavefunction(s) for diagram number 849
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[115] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[116] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[117] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[115] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[116] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[117] );
 
       // Amplitude(s) for diagram number 849
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[6], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18210,7 +18214,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[6], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18230,7 +18234,7 @@ namespace mg5amcCpu
       jamp_sv[105] += amp_sv[0];
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[6], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18254,12 +18258,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 850 OF 1240 ***
 
       // Wavefunction(s) for diagram number 850
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[118] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[119] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[120] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[118] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[119] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[120] );
 
       // Amplitude(s) for diagram number 850
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18279,7 +18283,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[117] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18299,7 +18303,7 @@ namespace mg5amcCpu
       jamp_sv[98] -= amp_sv[0];
       jamp_sv[100] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[5], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18326,7 +18330,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 851
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18346,7 +18350,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18366,7 +18370,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18393,7 +18397,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 852
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[29], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[29], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18420,7 +18424,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 853
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[29], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[29], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18447,7 +18451,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 854
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[61], w_fp[8], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[61], w_fp[8], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18474,7 +18478,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 855
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[90], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18493,7 +18497,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 856
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[44], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[44], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18508,7 +18512,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 857
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18521,7 +18525,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 858
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18536,7 +18540,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 859
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18549,7 +18553,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 860
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[64], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[64], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18568,7 +18572,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 861
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18580,7 +18584,7 @@ namespace mg5amcCpu
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18592,7 +18596,7 @@ namespace mg5amcCpu
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18611,7 +18615,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 862
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[39], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18626,7 +18630,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 863
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[102], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[102], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18639,7 +18643,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 864
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18652,7 +18656,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 865
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[90], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18671,7 +18675,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 866
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[50], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[50], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18686,7 +18690,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 867
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18699,7 +18703,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 868
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[113], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18714,7 +18718,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 869
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18727,7 +18731,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 870
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[63], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[63], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18746,7 +18750,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 871
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18758,7 +18762,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18770,7 +18774,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18789,7 +18793,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 872
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[47], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18804,7 +18808,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 873
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[113], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[113], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18817,7 +18821,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 874
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18830,7 +18834,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 875
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[90], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18849,7 +18853,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 876
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18864,7 +18868,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 877
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18877,7 +18881,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 878
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18892,7 +18896,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 879
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18905,7 +18909,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 880
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[64], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[64], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18924,7 +18928,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 881
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18936,7 +18940,7 @@ namespace mg5amcCpu
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18948,7 +18952,7 @@ namespace mg5amcCpu
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18967,7 +18971,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 882
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[90], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[90], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18986,7 +18990,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 883
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19001,7 +19005,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 884
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19014,7 +19018,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 885
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19029,7 +19033,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 886
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19042,7 +19046,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 887
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[63], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[63], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19061,7 +19065,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 888
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19073,7 +19077,7 @@ namespace mg5amcCpu
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19085,7 +19089,7 @@ namespace mg5amcCpu
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19104,7 +19108,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 889
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[18], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[18], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19123,7 +19127,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 890
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19142,7 +19146,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 891
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[93], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[93], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19161,7 +19165,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 892
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19180,7 +19184,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 893
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19195,7 +19199,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 894
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[65], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19207,10 +19211,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 895 OF 1240 ***
 
       // Wavefunction(s) for diagram number 895
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[65] );
+      helas_VVV1P0_1( w_fp[0], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[65] );
 
       // Amplitude(s) for diagram number 895
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[65], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19237,7 +19241,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 896
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[65], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19264,7 +19268,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 897
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19284,7 +19288,7 @@ namespace mg5amcCpu
       jamp_sv[100] -= amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19304,7 +19308,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19331,7 +19335,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 898
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19358,7 +19362,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 899
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19385,7 +19389,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 900
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19405,7 +19409,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19425,7 +19429,7 @@ namespace mg5amcCpu
       jamp_sv[83] += amp_sv[0];
       jamp_sv[107] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19452,7 +19456,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 901
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[69], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[69], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19479,7 +19483,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 902
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[70], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[70], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19503,12 +19507,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 903 OF 1240 ***
 
       // Wavefunction(s) for diagram number 903
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[93] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[90] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[93] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[90] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 903
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[93], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[93], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19528,7 +19532,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19548,7 +19552,7 @@ namespace mg5amcCpu
       jamp_sv[109] += amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19572,12 +19576,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 904 OF 1240 ***
 
       // Wavefunction(s) for diagram number 904
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[103] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[63] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[103] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[63] );
 
       // Amplitude(s) for diagram number 904
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19597,7 +19601,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[103], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[103], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19617,7 +19621,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[97] += amp_sv[0];
       jamp_sv[100] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[63], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[63], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19641,12 +19645,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 905 OF 1240 ***
 
       // Wavefunction(s) for diagram number 905
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] );
 
       // Amplitude(s) for diagram number 905
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19666,7 +19670,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19686,7 +19690,7 @@ namespace mg5amcCpu
       jamp_sv[107] += amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[6], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19713,7 +19717,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 906
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19733,7 +19737,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19753,7 +19757,7 @@ namespace mg5amcCpu
       jamp_sv[98] -= amp_sv[0];
       jamp_sv[99] += amp_sv[0];
       jamp_sv[100] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[4], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19780,7 +19784,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 907
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19800,7 +19804,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19820,7 +19824,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19847,7 +19851,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 908
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[27], w_fp[65], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[27], w_fp[65], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19874,7 +19878,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 909
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[27], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[27], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19901,7 +19905,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 910
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[8], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[66], w_fp[8], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19928,7 +19932,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 911
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[65], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19947,7 +19951,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 912
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[36], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[36], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19962,7 +19966,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 913
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19975,7 +19979,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 914
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[114], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19990,7 +19994,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 915
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20003,7 +20007,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 916
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[70], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[70], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20022,7 +20026,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 917
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20034,7 +20038,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20046,7 +20050,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20065,7 +20069,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 918
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[33], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20080,7 +20084,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 919
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[114], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[114], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20093,7 +20097,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 920
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20106,7 +20110,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 921
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[65], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20125,7 +20129,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 922
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[49], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[49], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20140,7 +20144,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 923
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20153,7 +20157,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 924
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[113], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20168,7 +20172,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 925
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20181,7 +20185,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 926
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[69], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[69], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20200,7 +20204,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 927
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20212,7 +20216,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20224,7 +20228,7 @@ namespace mg5amcCpu
       jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20243,7 +20247,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 928
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[47], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20258,7 +20262,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 929
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[113], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[113], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20271,7 +20275,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 930
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20284,7 +20288,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 931
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[65], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20303,7 +20307,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 932
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20318,7 +20322,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 933
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20331,7 +20335,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 934
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20346,7 +20350,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 935
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20359,7 +20363,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 936
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[70], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[70], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20378,7 +20382,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 937
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20390,7 +20394,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20402,7 +20406,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20421,7 +20425,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 938
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[65], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[65], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20440,7 +20444,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 939
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20455,7 +20459,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 940
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20468,7 +20472,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 941
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20483,7 +20487,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 942
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20496,7 +20500,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 943
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[69], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[69], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20515,7 +20519,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 944
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20527,7 +20531,7 @@ namespace mg5amcCpu
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20539,7 +20543,7 @@ namespace mg5amcCpu
       jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20558,7 +20562,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 945
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[15], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[15], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20577,7 +20581,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 946
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20596,7 +20600,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 947
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[94], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[94], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20615,7 +20619,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 948
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20634,7 +20638,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 949
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20649,7 +20653,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 950
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20661,10 +20665,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 951 OF 1240 ***
 
       // Wavefunction(s) for diagram number 951
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[71] );
+      helas_VVV1P0_1( w_fp[0], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[71] );
 
       // Amplitude(s) for diagram number 951
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[71], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20691,7 +20695,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 952
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[71], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20718,7 +20722,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 953
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20738,7 +20742,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20758,7 +20762,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20785,7 +20789,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 954
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20812,7 +20816,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 955
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20839,7 +20843,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 956
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20859,7 +20863,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[105] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20879,7 +20883,7 @@ namespace mg5amcCpu
       jamp_sv[83] += amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[107] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20906,7 +20910,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 957
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[74], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[74], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20933,7 +20937,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 958
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[75], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[75], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20957,12 +20961,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 959 OF 1240 ***
 
       // Wavefunction(s) for diagram number 959
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[94] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[65] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[94] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[65] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 959
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[94], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[94], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20982,7 +20986,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[65], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[65], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21002,7 +21006,7 @@ namespace mg5amcCpu
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21026,12 +21030,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 960 OF 1240 ***
 
       // Wavefunction(s) for diagram number 960
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[90] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[93] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[69] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[90] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[93] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[69] );
 
       // Amplitude(s) for diagram number 960
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21051,7 +21055,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[93], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[93], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21071,7 +21075,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[69], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[69], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21098,7 +21102,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 961
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21118,7 +21122,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[5], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21138,7 +21142,7 @@ namespace mg5amcCpu
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[107] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[5], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21165,7 +21169,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 962
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21185,7 +21189,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21205,7 +21209,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[105] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[4], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21232,7 +21236,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 963
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21252,7 +21256,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21272,7 +21276,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21299,7 +21303,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 964
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[24], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[24], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21326,7 +21330,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 965
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[24], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[24], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21353,7 +21357,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 966
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[8], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[72], w_fp[8], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21380,7 +21384,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 967
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[71], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21399,7 +21403,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 968
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[35], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[35], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21414,7 +21418,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 969
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21427,7 +21431,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 970
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[114], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21442,7 +21446,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 971
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21455,7 +21459,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 972
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[75], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[75], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21474,7 +21478,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 973
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21486,7 +21490,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21498,7 +21502,7 @@ namespace mg5amcCpu
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21517,7 +21521,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 974
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[33], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21532,7 +21536,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 975
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[114], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[114], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21545,7 +21549,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 976
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21558,7 +21562,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 977
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[71], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21577,7 +21581,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 978
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[43], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[43], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21592,7 +21596,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 979
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21605,7 +21609,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 980
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21620,7 +21624,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 981
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21633,7 +21637,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 982
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[74], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[74], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21652,7 +21656,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 983
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[94], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[94], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21664,7 +21668,7 @@ namespace mg5amcCpu
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21676,7 +21680,7 @@ namespace mg5amcCpu
       jamp_sv[85] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21695,7 +21699,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 984
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21710,7 +21714,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 985
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[102], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[102], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21723,7 +21727,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 986
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21736,7 +21740,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 987
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[71], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21755,7 +21759,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 988
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21770,7 +21774,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 989
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21783,7 +21787,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 990
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21798,7 +21802,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 991
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21811,7 +21815,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 992
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[75], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[75], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21830,7 +21834,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 993
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21842,7 +21846,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21854,7 +21858,7 @@ namespace mg5amcCpu
       jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21873,7 +21877,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 994
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[71], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21892,7 +21896,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 995
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21907,7 +21911,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 996
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21920,7 +21924,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 997
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21935,7 +21939,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 998
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21948,7 +21952,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 999
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[74], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[74], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21967,7 +21971,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1000
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[94], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[94], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21979,7 +21983,7 @@ namespace mg5amcCpu
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21991,7 +21995,7 @@ namespace mg5amcCpu
       jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22010,7 +22014,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1001
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[17], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[17], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22029,7 +22033,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1002
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22048,7 +22052,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1003
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[97], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[97], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22067,7 +22071,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1004
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22086,7 +22090,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1005
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22101,7 +22105,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1006
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[76], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[76], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22116,7 +22120,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1007
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22143,7 +22147,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1008
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22170,7 +22174,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1009
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22190,7 +22194,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22210,7 +22214,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22237,7 +22241,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1010
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[98], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22264,7 +22268,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1011
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[98], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22291,7 +22295,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1012
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22311,7 +22315,7 @@ namespace mg5amcCpu
       jamp_sv[101] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22331,7 +22335,7 @@ namespace mg5amcCpu
       jamp_sv[103] += amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22358,7 +22362,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1013
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[108], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[108], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22385,7 +22389,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1014
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[59], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[59], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22409,12 +22413,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1015 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1015
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[11] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[42] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[76] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[11] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[42] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[76] );
 
       // Amplitude(s) for diagram number 1015
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[6], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22434,7 +22438,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[6], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22454,7 +22458,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[6], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22478,12 +22482,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1016 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1016
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[97] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[71] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[97] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[71] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 1016
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[97], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[97], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22503,7 +22507,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22523,7 +22527,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22550,7 +22554,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1017
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22570,7 +22574,7 @@ namespace mg5amcCpu
       jamp_sv[107] += amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22590,7 +22594,7 @@ namespace mg5amcCpu
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[24], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22617,7 +22621,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1018
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[85], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[85], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22637,7 +22641,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22657,7 +22661,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22684,7 +22688,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1019
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22711,7 +22715,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1020
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22738,7 +22742,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1021
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22758,7 +22762,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22778,7 +22782,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22805,7 +22809,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1022
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[101], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[101], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22832,7 +22836,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1023
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[101], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[101], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22859,7 +22863,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1024
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22879,7 +22883,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22899,7 +22903,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22926,7 +22930,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1025
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[108], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[108], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22953,7 +22957,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1026
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[68], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[68], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22980,7 +22984,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1027
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[5], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23000,7 +23004,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[5], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23020,7 +23024,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[5], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23044,12 +23048,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1028 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1028
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[10] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[16] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[10] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[16] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 1028
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23069,7 +23073,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23089,7 +23093,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23116,7 +23120,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1029
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23136,7 +23140,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23156,7 +23160,7 @@ namespace mg5amcCpu
       jamp_sv[77] += amp_sv[0];
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[27], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23183,7 +23187,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1030
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23203,7 +23207,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23223,7 +23227,7 @@ namespace mg5amcCpu
       jamp_sv[77] += amp_sv[0];
       jamp_sv[109] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23250,7 +23254,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1031
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23277,7 +23281,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1032
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[56], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[56], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23304,7 +23308,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1033
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23324,7 +23328,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23344,7 +23348,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23371,7 +23375,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1034
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[96], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[96], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23398,7 +23402,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1035
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[96], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[96], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23425,7 +23429,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1036
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23445,7 +23449,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23465,7 +23469,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23492,7 +23496,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1037
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[108], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[108], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23519,7 +23523,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1038
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[67], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[67], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23546,7 +23550,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1039
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[29], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23566,7 +23570,7 @@ namespace mg5amcCpu
       jamp_sv[70] += amp_sv[0];
       jamp_sv[94] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[29], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23586,7 +23590,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[29], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23610,12 +23614,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1040 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1040
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[76] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[42] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[11] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[76] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[42] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 1040
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23635,7 +23639,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23655,7 +23659,7 @@ namespace mg5amcCpu
       jamp_sv[70] += amp_sv[0];
       jamp_sv[90] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23682,7 +23686,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1041
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23702,7 +23706,7 @@ namespace mg5amcCpu
       jamp_sv[71] -= amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[29], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23722,7 +23726,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[29], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23749,7 +23753,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1042
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[87], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[87], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23769,7 +23773,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[34], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[34], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23789,7 +23793,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23816,7 +23820,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1043
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23836,7 +23840,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23856,7 +23860,7 @@ namespace mg5amcCpu
       jamp_sv[71] -= amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23876,7 +23880,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23896,7 +23900,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23916,7 +23920,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23936,7 +23940,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23956,7 +23960,7 @@ namespace mg5amcCpu
       jamp_sv[113] += amp_sv[0];
       jamp_sv[118] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23976,7 +23980,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24003,7 +24007,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1044
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[30], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[30], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24023,7 +24027,7 @@ namespace mg5amcCpu
       jamp_sv[71] -= amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[31], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[31], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24043,7 +24047,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[32], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[32], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24070,7 +24074,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1045
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24090,7 +24094,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24110,7 +24114,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[106], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[106], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24137,7 +24141,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1046
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24149,7 +24153,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1047
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24161,7 +24165,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1048
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24173,7 +24177,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1049
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24185,7 +24189,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1050
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[48], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[48], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24197,7 +24201,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1051
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24209,7 +24213,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1052
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24221,7 +24225,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1053
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24233,7 +24237,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1054
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24245,7 +24249,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1055
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24257,7 +24261,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1056
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[40], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[40], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24269,7 +24273,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1057
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24281,7 +24285,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1058
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[114], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24296,7 +24300,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1059
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24309,7 +24313,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1060
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[100], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[100], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24324,7 +24328,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1061
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[96], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[96], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24343,7 +24347,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1062
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24356,7 +24360,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1063
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[67], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[67], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24375,7 +24379,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1064
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[76], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[76], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24387,7 +24391,7 @@ namespace mg5amcCpu
       jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24399,7 +24403,7 @@ namespace mg5amcCpu
       jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[64] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24418,7 +24422,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1065
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24430,7 +24434,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1066
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24442,7 +24446,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1067
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24454,7 +24458,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1068
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24466,7 +24470,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1069
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[53], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[53], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24478,7 +24482,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1070
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24490,7 +24494,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1071
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24502,7 +24506,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1072
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24514,7 +24518,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1073
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24526,7 +24530,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1074
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24538,7 +24542,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1075
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[28], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[28], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24550,7 +24554,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1076
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24562,7 +24566,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1077
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24577,7 +24581,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1078
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24590,7 +24594,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1079
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[89], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[89], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24605,7 +24609,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1080
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[101], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[101], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24624,7 +24628,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1081
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24637,7 +24641,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1082
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[68], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[68], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24656,7 +24660,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1083
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24668,7 +24672,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24680,7 +24684,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24699,7 +24703,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1084
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24711,7 +24715,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1085
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24723,7 +24727,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1086
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24735,7 +24739,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1087
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24747,7 +24751,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1088
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[7], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24759,7 +24763,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1089
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24771,7 +24775,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1090
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24783,7 +24787,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1091
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24795,7 +24799,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1092
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24807,7 +24811,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1093
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24819,7 +24823,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1094
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[25], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[25], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24831,7 +24835,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1095
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24843,7 +24847,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1096
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[113], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24858,7 +24862,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1097
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24871,7 +24875,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1098
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[91], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[91], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24886,7 +24890,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1099
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[98], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24905,7 +24909,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1100
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[26], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[26], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24918,7 +24922,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1101
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[59], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[59], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24937,7 +24941,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1102
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24949,7 +24953,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24961,7 +24965,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24980,7 +24984,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1103
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24995,7 +24999,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1104
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25008,7 +25012,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1105
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25023,7 +25027,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1106
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[96], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[96], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25042,7 +25046,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1107
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[78], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[78], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25055,7 +25059,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1108
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[67], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[67], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25074,7 +25078,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1109
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[76], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[76], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25086,7 +25090,7 @@ namespace mg5amcCpu
       jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25098,7 +25102,7 @@ namespace mg5amcCpu
       jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25117,7 +25121,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1110
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25132,7 +25136,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1111
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25145,7 +25149,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1112
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25160,7 +25164,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1113
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[101], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[101], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25179,7 +25183,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1114
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[58], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[58], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25192,7 +25196,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1115
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[68], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[68], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25211,7 +25215,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1116
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25223,7 +25227,7 @@ namespace mg5amcCpu
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25235,7 +25239,7 @@ namespace mg5amcCpu
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25254,7 +25258,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1117
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25269,7 +25273,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1118
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25282,7 +25286,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1119
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25297,7 +25301,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1120
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[98], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25316,7 +25320,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1121
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25329,7 +25333,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1122
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[59], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[59], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25348,7 +25352,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1123
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25360,7 +25364,7 @@ namespace mg5amcCpu
       jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25372,7 +25376,7 @@ namespace mg5amcCpu
       jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25388,12 +25392,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1124 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1124
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[71] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[97] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[71] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[97] );
 
       // Amplitude(s) for diagram number 1124
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25413,7 +25417,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25433,7 +25437,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25453,7 +25457,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25473,7 +25477,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25493,7 +25497,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25513,7 +25517,7 @@ namespace mg5amcCpu
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25533,7 +25537,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[118] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25553,7 +25557,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[118] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25577,12 +25581,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1125 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1125
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[59] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[97], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[60] );
+      helas_VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[59] );
+      helas_VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] );
+      helas_VVV1P0_1( w_fp[97], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[60] );
 
       // Amplitude(s) for diagram number 1125
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[59], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[59], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25602,7 +25606,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25622,7 +25626,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25646,12 +25650,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1126 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1126
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[21], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[17] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[71], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[98] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[97], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[111] );
+      helas_VVV1P0_1( w_fp[21], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[17] );
+      helas_VVV1P0_1( w_fp[71], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[98] );
+      helas_VVV1P0_1( w_fp[97], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 1126
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25671,7 +25675,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25691,7 +25695,7 @@ namespace mg5amcCpu
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25718,7 +25722,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1127
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[21], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25738,7 +25742,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[71], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25758,7 +25762,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[97], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[97], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25782,12 +25786,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1128 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1128
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
+      helas_FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
+      helas_FFV1_2( w_fp[3], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
 
       // Amplitude(s) for diagram number 1128
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25795,7 +25799,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25803,7 +25807,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[68], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[68], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25818,7 +25822,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1129
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25830,7 +25834,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25842,7 +25846,7 @@ namespace mg5amcCpu
       jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25861,7 +25865,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1130
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25869,7 +25873,7 @@ namespace mg5amcCpu
       jamp_sv[74] -= amp_sv[0];
       jamp_sv[80] -= amp_sv[0];
       jamp_sv[86] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25877,7 +25881,7 @@ namespace mg5amcCpu
       jamp_sv[78] += amp_sv[0];
       jamp_sv[80] -= amp_sv[0];
       jamp_sv[84] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[39], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25892,7 +25896,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1131
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25900,7 +25904,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25908,7 +25912,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[68], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[68], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25923,7 +25927,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1132
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25935,7 +25939,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25947,7 +25951,7 @@ namespace mg5amcCpu
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25966,7 +25970,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1133
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25974,7 +25978,7 @@ namespace mg5amcCpu
       jamp_sv[98] -= amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25982,7 +25986,7 @@ namespace mg5amcCpu
       jamp_sv[102] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[108] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[47], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25994,12 +25998,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1134 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1134
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
+      helas_FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_FFV1_1( w_fp[2], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
 
       // Amplitude(s) for diagram number 1134
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[23], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[23], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26007,7 +26011,7 @@ namespace mg5amcCpu
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
       jamp_sv[55] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[21], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[21], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26015,7 +26019,7 @@ namespace mg5amcCpu
       jamp_sv[25] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
       jamp_sv[49] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[71], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[71], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26030,7 +26034,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1135
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26042,7 +26046,7 @@ namespace mg5amcCpu
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26054,7 +26058,7 @@ namespace mg5amcCpu
       jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26073,7 +26077,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1136
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26081,7 +26085,7 @@ namespace mg5amcCpu
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
       jamp_sv[54] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[21], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[21], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26089,7 +26093,7 @@ namespace mg5amcCpu
       jamp_sv[24] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
       jamp_sv[48] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[71], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[71], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26104,7 +26108,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1137
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26116,7 +26120,7 @@ namespace mg5amcCpu
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26128,7 +26132,7 @@ namespace mg5amcCpu
       jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26147,7 +26151,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1138
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[23], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26159,7 +26163,7 @@ namespace mg5amcCpu
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[55] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[21], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[21], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26171,7 +26175,7 @@ namespace mg5amcCpu
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[71], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[71], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26190,7 +26194,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1139
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26202,7 +26206,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26214,7 +26218,7 @@ namespace mg5amcCpu
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[68], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[68], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26230,12 +26234,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1140 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1140
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[68] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[29] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[10] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[68] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[29] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 1140
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26255,7 +26259,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26275,7 +26279,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26295,7 +26299,7 @@ namespace mg5amcCpu
       jamp_sv[100] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26315,7 +26319,7 @@ namespace mg5amcCpu
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26335,7 +26339,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26355,7 +26359,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26375,7 +26379,7 @@ namespace mg5amcCpu
       jamp_sv[110] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26395,7 +26399,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26419,12 +26423,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1141 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1141
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[16] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[71] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[10], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[21] );
+      helas_VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[16] );
+      helas_VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[71] );
+      helas_VVV1P0_1( w_fp[10], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 1141
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26444,7 +26448,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26464,7 +26468,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26488,12 +26492,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1142 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1142
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[68], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[29], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[60] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[10], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[20] );
+      helas_VVV1P0_1( w_fp[68], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_VVV1P0_1( w_fp[29], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[60] );
+      helas_VVV1P0_1( w_fp[10], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[20] );
 
       // Amplitude(s) for diagram number 1142
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26513,7 +26517,7 @@ namespace mg5amcCpu
       jamp_sv[100] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26533,7 +26537,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26560,7 +26564,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1143
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[68], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[68], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26580,7 +26584,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[29], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[29], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26600,7 +26604,7 @@ namespace mg5amcCpu
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[10], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26624,12 +26628,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1144 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1144
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[111] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
+      helas_FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
+      helas_FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[111] );
+      helas_FFV1_2( w_fp[3], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
 
       // Amplitude(s) for diagram number 1144
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[59], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[59], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26637,7 +26641,7 @@ namespace mg5amcCpu
       jamp_sv[67] -= amp_sv[0];
       jamp_sv[69] -= amp_sv[0];
       jamp_sv[71] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[111], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[111], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26645,7 +26649,7 @@ namespace mg5amcCpu
       jamp_sv[68] += amp_sv[0];
       jamp_sv[69] -= amp_sv[0];
       jamp_sv[70] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[98], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26660,7 +26664,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1145
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26672,7 +26676,7 @@ namespace mg5amcCpu
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26684,7 +26688,7 @@ namespace mg5amcCpu
       jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26703,7 +26707,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1146
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26711,7 +26715,7 @@ namespace mg5amcCpu
       jamp_sv[50] -= amp_sv[0];
       jamp_sv[56] -= amp_sv[0];
       jamp_sv[62] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26719,7 +26723,7 @@ namespace mg5amcCpu
       jamp_sv[54] += amp_sv[0];
       jamp_sv[56] -= amp_sv[0];
       jamp_sv[60] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[33], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26734,7 +26738,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1147
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[59], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[59], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26742,7 +26746,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[111], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[111], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26750,7 +26754,7 @@ namespace mg5amcCpu
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[98], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26765,7 +26769,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1148
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26777,7 +26781,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26789,7 +26793,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26808,7 +26812,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1149
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[47], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26816,7 +26820,7 @@ namespace mg5amcCpu
       jamp_sv[100] -= amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[47], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26824,7 +26828,7 @@ namespace mg5amcCpu
       jamp_sv[103] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[114] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[47], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26836,12 +26840,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1150 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1150
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] );
+      helas_FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
+      helas_FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
+      helas_FFV1_1( w_fp[2], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] );
 
       // Amplitude(s) for diagram number 1150
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26849,7 +26853,7 @@ namespace mg5amcCpu
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
       jamp_sv[79] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[68], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[68], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26857,7 +26861,7 @@ namespace mg5amcCpu
       jamp_sv[27] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
       jamp_sv[73] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[29], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[29], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26872,7 +26876,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1151
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26884,7 +26888,7 @@ namespace mg5amcCpu
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26896,7 +26900,7 @@ namespace mg5amcCpu
       jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26915,7 +26919,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1152
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[17], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[17], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26923,7 +26927,7 @@ namespace mg5amcCpu
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
       jamp_sv[78] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[68], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[68], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26931,7 +26935,7 @@ namespace mg5amcCpu
       jamp_sv[26] += amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
       jamp_sv[72] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[29], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[29], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26946,7 +26950,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1153
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26958,7 +26962,7 @@ namespace mg5amcCpu
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26970,7 +26974,7 @@ namespace mg5amcCpu
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26989,7 +26993,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1154
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[17], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[17], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27001,7 +27005,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[79] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[68], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[68], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27013,7 +27017,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[29], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[29], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27032,7 +27036,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1155
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[59], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[59], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27044,7 +27048,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[111], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[111], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27056,7 +27060,7 @@ namespace mg5amcCpu
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[98], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27072,12 +27076,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1156 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1156
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[98] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[27] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[98] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[27] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 1156
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27097,7 +27101,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27117,7 +27121,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[102] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27137,7 +27141,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[103] += amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27157,7 +27161,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[97] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27177,7 +27181,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[96] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27197,7 +27201,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[97] += amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27217,7 +27221,7 @@ namespace mg5amcCpu
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[103] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27237,7 +27241,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27261,12 +27265,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1157 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1157
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[98], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[59] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[27], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[29] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[111], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[68] );
+      helas_VVV1P0_1( w_fp[98], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[59] );
+      helas_VVV1P0_1( w_fp[27], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[29] );
+      helas_VVV1P0_1( w_fp[111], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[68] );
 
       // Amplitude(s) for diagram number 1157
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[59], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[59], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27286,7 +27290,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[102] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27306,7 +27310,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[96] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27330,12 +27334,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1158 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1158
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[98], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[17] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[21] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[111], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[71] );
+      helas_VVV1P0_1( w_fp[98], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[17] );
+      helas_VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[21] );
+      helas_VVV1P0_1( w_fp[111], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[71] );
 
       // Amplitude(s) for diagram number 1158
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27355,7 +27359,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[103] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27375,7 +27379,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[97] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27402,7 +27406,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1159
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[98], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27422,7 +27426,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[27], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27442,7 +27446,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[97] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[111], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[111], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27466,12 +27470,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1160 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1160
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
+      helas_FFV1_2( w_fp[3], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
+      helas_FFV1_2( w_fp[3], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
 
       // Amplitude(s) for diagram number 1160
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27479,7 +27483,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= amp_sv[0];
       jamp_sv[63] -= amp_sv[0];
       jamp_sv[65] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[20], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27487,7 +27491,7 @@ namespace mg5amcCpu
       jamp_sv[62] += amp_sv[0];
       jamp_sv[63] -= amp_sv[0];
       jamp_sv[64] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27502,7 +27506,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1161
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27514,7 +27518,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27526,7 +27530,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27545,7 +27549,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1162
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[33], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27553,7 +27557,7 @@ namespace mg5amcCpu
       jamp_sv[52] -= amp_sv[0];
       jamp_sv[58] -= amp_sv[0];
       jamp_sv[68] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[33], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27561,7 +27565,7 @@ namespace mg5amcCpu
       jamp_sv[55] += amp_sv[0];
       jamp_sv[58] -= amp_sv[0];
       jamp_sv[66] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[33], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27576,7 +27580,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1163
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27584,7 +27588,7 @@ namespace mg5amcCpu
       jamp_sv[85] -= amp_sv[0];
       jamp_sv[87] -= amp_sv[0];
       jamp_sv[89] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[20], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27592,7 +27596,7 @@ namespace mg5amcCpu
       jamp_sv[86] += amp_sv[0];
       jamp_sv[87] -= amp_sv[0];
       jamp_sv[88] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27607,7 +27611,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1164
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27619,7 +27623,7 @@ namespace mg5amcCpu
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27631,7 +27635,7 @@ namespace mg5amcCpu
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27650,7 +27654,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1165
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27658,7 +27662,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= amp_sv[0];
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27666,7 +27670,7 @@ namespace mg5amcCpu
       jamp_sv[79] += amp_sv[0];
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[90] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27678,12 +27682,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1166 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1166
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
+      helas_FFV1_1( w_fp[2], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
+      helas_FFV1_1( w_fp[2], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
 
       // Amplitude(s) for diagram number 1166
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27691,7 +27695,7 @@ namespace mg5amcCpu
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
       jamp_sv[103] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[98], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[98], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27699,7 +27703,7 @@ namespace mg5amcCpu
       jamp_sv[29] += amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
       jamp_sv[97] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[27], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[27], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27714,7 +27718,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1167
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27726,7 +27730,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27738,7 +27742,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27757,7 +27761,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1168
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27765,7 +27769,7 @@ namespace mg5amcCpu
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
       jamp_sv[102] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[98], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[98], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27773,7 +27777,7 @@ namespace mg5amcCpu
       jamp_sv[28] += amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
       jamp_sv[96] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[27], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[27], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27788,7 +27792,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1169
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27800,7 +27804,7 @@ namespace mg5amcCpu
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27812,7 +27816,7 @@ namespace mg5amcCpu
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27831,7 +27835,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1170
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[23], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27843,7 +27847,7 @@ namespace mg5amcCpu
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[98], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[98], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27855,7 +27859,7 @@ namespace mg5amcCpu
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[27], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[27], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27874,7 +27878,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1171
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27886,7 +27890,7 @@ namespace mg5amcCpu
       jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[20], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27898,7 +27902,7 @@ namespace mg5amcCpu
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27914,15 +27918,15 @@ namespace mg5amcCpu
       // *** DIAGRAM 1172 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1172
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[60] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[20] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[60] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[20] );
+      helas_FFV1_2( w_fp[3], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
+      helas_FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
 
       // Amplitude(s) for diagram number 1172
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27930,7 +27934,7 @@ namespace mg5amcCpu
       jamp_sv[43] -= amp_sv[0];
       jamp_sv[45] -= amp_sv[0];
       jamp_sv[47] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[27], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27938,7 +27942,7 @@ namespace mg5amcCpu
       jamp_sv[44] += amp_sv[0];
       jamp_sv[45] -= amp_sv[0];
       jamp_sv[46] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27950,12 +27954,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1173 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1173
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[60], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[68] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[20], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] );
+      helas_VVV1P0_1( w_fp[60], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[68] );
+      helas_VVV1P0_1( w_fp[20], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] );
 
       // Amplitude(s) for diagram number 1173
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27967,7 +27971,7 @@ namespace mg5amcCpu
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27979,7 +27983,7 @@ namespace mg5amcCpu
       jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27998,7 +28002,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1174
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[77], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28006,7 +28010,7 @@ namespace mg5amcCpu
       jamp_sv[26] -= amp_sv[0];
       jamp_sv[32] -= amp_sv[0];
       jamp_sv[38] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28014,7 +28018,7 @@ namespace mg5amcCpu
       jamp_sv[30] += amp_sv[0];
       jamp_sv[32] -= amp_sv[0];
       jamp_sv[36] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[77], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28026,12 +28030,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1175 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1175
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_FFV1_1( w_fp[2], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
+      helas_FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
+      helas_FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 1175
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28039,7 +28043,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[61] -= amp_sv[0];
       jamp_sv[85] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[71], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[71], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28047,7 +28051,7 @@ namespace mg5amcCpu
       jamp_sv[51] += amp_sv[0];
       jamp_sv[61] -= amp_sv[0];
       jamp_sv[75] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[21], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[21], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28062,7 +28066,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1176
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28074,7 +28078,7 @@ namespace mg5amcCpu
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28086,7 +28090,7 @@ namespace mg5amcCpu
       jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28105,7 +28109,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1177
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28113,7 +28117,7 @@ namespace mg5amcCpu
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28121,7 +28125,7 @@ namespace mg5amcCpu
       jamp_sv[109] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[47], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28136,7 +28140,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1178
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28148,7 +28152,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[85] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[71], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[71], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28160,7 +28164,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[21], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[21], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28179,7 +28183,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1179
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28191,7 +28195,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[27], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28203,7 +28207,7 @@ namespace mg5amcCpu
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28222,7 +28226,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1180
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[60], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28242,7 +28246,7 @@ namespace mg5amcCpu
       jamp_sv[103] += amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[24], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28262,7 +28266,7 @@ namespace mg5amcCpu
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[20], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28289,7 +28293,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1181
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28309,7 +28313,7 @@ namespace mg5amcCpu
       jamp_sv[107] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28329,7 +28333,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[107] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28349,7 +28353,7 @@ namespace mg5amcCpu
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28369,7 +28373,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28389,7 +28393,7 @@ namespace mg5amcCpu
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28409,7 +28413,7 @@ namespace mg5amcCpu
       jamp_sv[109] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28429,7 +28433,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28449,7 +28453,7 @@ namespace mg5amcCpu
       jamp_sv[104] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28473,12 +28477,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1182 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1182
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[60], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[72] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[24], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[60] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[24] );
+      helas_VVV1P0_1( w_fp[60], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[72] );
+      helas_VVV1P0_1( w_fp[24], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[60] );
+      helas_VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[24] );
 
       // Amplitude(s) for diagram number 1182
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[72], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[72], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28498,7 +28502,7 @@ namespace mg5amcCpu
       jamp_sv[107] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28518,7 +28522,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28545,7 +28549,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1183
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28565,7 +28569,7 @@ namespace mg5amcCpu
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28585,7 +28589,7 @@ namespace mg5amcCpu
       jamp_sv[109] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28612,7 +28616,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1184
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28624,7 +28628,7 @@ namespace mg5amcCpu
       jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28636,7 +28640,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28655,7 +28659,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1185
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28663,7 +28667,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[107] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[27], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[27], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28671,7 +28675,7 @@ namespace mg5amcCpu
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28686,7 +28690,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1186
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28698,7 +28702,7 @@ namespace mg5amcCpu
       jamp_sv[38] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28710,7 +28714,7 @@ namespace mg5amcCpu
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28729,7 +28733,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1187
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28737,7 +28741,7 @@ namespace mg5amcCpu
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[60] -= amp_sv[0];
       jamp_sv[84] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[71], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[71], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28745,7 +28749,7 @@ namespace mg5amcCpu
       jamp_sv[50] += amp_sv[0];
       jamp_sv[60] -= amp_sv[0];
       jamp_sv[74] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[21], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[21], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28757,15 +28761,15 @@ namespace mg5amcCpu
       // *** DIAGRAM 1188 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1188
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[71] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[59] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[71] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[59] );
+      helas_FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] );
+      helas_FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
+      helas_FFV1_2( w_fp[3], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] );
 
       // Amplitude(s) for diagram number 1188
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[24], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28773,7 +28777,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= amp_sv[0];
       jamp_sv[39] -= amp_sv[0];
       jamp_sv[41] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28781,7 +28785,7 @@ namespace mg5amcCpu
       jamp_sv[38] += amp_sv[0];
       jamp_sv[39] -= amp_sv[0];
       jamp_sv[40] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[72], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28793,12 +28797,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1189 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1189
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[98] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[27] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[59], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] );
+      helas_VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[98] );
+      helas_VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[27] );
+      helas_VVV1P0_1( w_fp[59], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] );
 
       // Amplitude(s) for diagram number 1189
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28810,7 +28814,7 @@ namespace mg5amcCpu
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28822,7 +28826,7 @@ namespace mg5amcCpu
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28841,7 +28845,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1190
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[77], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28849,7 +28853,7 @@ namespace mg5amcCpu
       jamp_sv[28] -= amp_sv[0];
       jamp_sv[34] -= amp_sv[0];
       jamp_sv[44] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[77], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28857,7 +28861,7 @@ namespace mg5amcCpu
       jamp_sv[31] += amp_sv[0];
       jamp_sv[34] -= amp_sv[0];
       jamp_sv[42] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[77], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28869,12 +28873,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1191 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1191
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] );
+      helas_FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
+      helas_FFV1_1( w_fp[2], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
 
       // Amplitude(s) for diagram number 1191
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[29], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[29], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28882,7 +28886,7 @@ namespace mg5amcCpu
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[67] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[68], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[68], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28890,7 +28894,7 @@ namespace mg5amcCpu
       jamp_sv[53] += amp_sv[0];
       jamp_sv[67] -= amp_sv[0];
       jamp_sv[99] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28905,7 +28909,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1192
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28917,7 +28921,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28929,7 +28933,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28948,7 +28952,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1193
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28956,7 +28960,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28964,7 +28968,7 @@ namespace mg5amcCpu
       jamp_sv[85] += amp_sv[0];
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[39], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28979,7 +28983,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1194
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[29], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[29], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28991,7 +28995,7 @@ namespace mg5amcCpu
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[68], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[68], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29003,7 +29007,7 @@ namespace mg5amcCpu
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[23], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29022,7 +29026,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1195
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[24], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29034,7 +29038,7 @@ namespace mg5amcCpu
       jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29046,7 +29050,7 @@ namespace mg5amcCpu
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[72], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29065,7 +29069,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1196
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[21], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29085,7 +29089,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[71], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29105,7 +29109,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[98] -= amp_sv[0];
       jamp_sv[99] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[59], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[59], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29132,7 +29136,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1197
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29152,7 +29156,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[108] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29172,7 +29176,7 @@ namespace mg5amcCpu
       jamp_sv[83] += amp_sv[0];
       jamp_sv[108] += amp_sv[0];
       jamp_sv[109] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29192,7 +29196,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
       jamp_sv[109] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29212,7 +29216,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29232,7 +29236,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29252,7 +29256,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29272,7 +29276,7 @@ namespace mg5amcCpu
       jamp_sv[94] += amp_sv[0];
       jamp_sv[98] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29292,7 +29296,7 @@ namespace mg5amcCpu
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29316,12 +29320,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1198 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1198
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[21], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[66] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[71], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[59], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[71] );
+      helas_VVV1P0_1( w_fp[21], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[66] );
+      helas_VVV1P0_1( w_fp[71], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] );
+      helas_VVV1P0_1( w_fp[59], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[71] );
 
       // Amplitude(s) for diagram number 1198
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[66], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[66], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29341,7 +29345,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[108] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29361,7 +29365,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29388,7 +29392,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1199
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29408,7 +29412,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
       jamp_sv[109] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29428,7 +29432,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29455,7 +29459,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1200
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29467,7 +29471,7 @@ namespace mg5amcCpu
       jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29479,7 +29483,7 @@ namespace mg5amcCpu
       jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29498,7 +29502,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1201
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[24], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29506,7 +29510,7 @@ namespace mg5amcCpu
       jamp_sv[79] -= amp_sv[0];
       jamp_sv[81] -= amp_sv[0];
       jamp_sv[83] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[60], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[60], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29514,7 +29518,7 @@ namespace mg5amcCpu
       jamp_sv[80] += amp_sv[0];
       jamp_sv[81] -= amp_sv[0];
       jamp_sv[82] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[72], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[72], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29529,7 +29533,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1202
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29541,7 +29545,7 @@ namespace mg5amcCpu
       jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29553,7 +29557,7 @@ namespace mg5amcCpu
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29572,7 +29576,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1203
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[29], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[29], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29580,7 +29584,7 @@ namespace mg5amcCpu
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[66] -= amp_sv[0];
       jamp_sv[108] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[68], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[68], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29588,7 +29592,7 @@ namespace mg5amcCpu
       jamp_sv[52] += amp_sv[0];
       jamp_sv[66] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[23], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[23], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29600,15 +29604,15 @@ namespace mg5amcCpu
       // *** DIAGRAM 1204 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1204
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[68] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[29] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[68] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[29] );
+      helas_FFV1_2( w_fp[3], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
+      helas_FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] );
 
       // Amplitude(s) for diagram number 1204
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29616,7 +29620,7 @@ namespace mg5amcCpu
       jamp_sv[31] -= amp_sv[0];
       jamp_sv[33] -= amp_sv[0];
       jamp_sv[35] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29624,7 +29628,7 @@ namespace mg5amcCpu
       jamp_sv[32] += amp_sv[0];
       jamp_sv[33] -= amp_sv[0];
       jamp_sv[34] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[66], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29636,12 +29640,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1205 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1205
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[23], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[72] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[60] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[24] );
+      helas_VVV1P0_1( w_fp[23], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[72] );
+      helas_VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[60] );
+      helas_VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[24] );
 
       // Amplitude(s) for diagram number 1205
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29653,7 +29657,7 @@ namespace mg5amcCpu
       jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29665,7 +29669,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29684,7 +29688,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1206
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[77], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29692,7 +29696,7 @@ namespace mg5amcCpu
       jamp_sv[29] -= amp_sv[0];
       jamp_sv[40] -= amp_sv[0];
       jamp_sv[46] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[77], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29700,7 +29704,7 @@ namespace mg5amcCpu
       jamp_sv[37] += amp_sv[0];
       jamp_sv[40] -= amp_sv[0];
       jamp_sv[43] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29712,12 +29716,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1207 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1207
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
+      helas_FFV1_1( w_fp[2], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
+      helas_FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
 
       // Amplitude(s) for diagram number 1207
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29725,7 +29729,7 @@ namespace mg5amcCpu
       jamp_sv[23] -= amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[16], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[16], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29733,7 +29737,7 @@ namespace mg5amcCpu
       jamp_sv[77] += amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[27], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[27], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29748,7 +29752,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1208
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29760,7 +29764,7 @@ namespace mg5amcCpu
       jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29772,7 +29776,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29791,7 +29795,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1209
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29799,7 +29803,7 @@ namespace mg5amcCpu
       jamp_sv[53] -= amp_sv[0];
       jamp_sv[64] -= amp_sv[0];
       jamp_sv[70] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29807,7 +29811,7 @@ namespace mg5amcCpu
       jamp_sv[61] += amp_sv[0];
       jamp_sv[64] -= amp_sv[0];
       jamp_sv[67] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[52], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29822,7 +29826,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1210
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[77], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29834,7 +29838,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[16], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29846,7 +29850,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[27], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[27], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29865,7 +29869,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1211
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29877,7 +29881,7 @@ namespace mg5amcCpu
       jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29889,7 +29893,7 @@ namespace mg5amcCpu
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[66], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29908,7 +29912,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1212
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[23], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[23], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29928,7 +29932,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[68], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[68], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29948,7 +29952,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[100] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[29], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[29], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29975,7 +29979,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1213
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29995,7 +29999,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[114] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30015,7 +30019,7 @@ namespace mg5amcCpu
       jamp_sv[91] += amp_sv[0];
       jamp_sv[114] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30035,7 +30039,7 @@ namespace mg5amcCpu
       jamp_sv[70] += amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30055,7 +30059,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30075,7 +30079,7 @@ namespace mg5amcCpu
       jamp_sv[91] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30095,7 +30099,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30115,7 +30119,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30135,7 +30139,7 @@ namespace mg5amcCpu
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30159,12 +30163,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1214 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1214
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[23], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[61] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[68], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[23] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[29], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[68] );
+      helas_VVV1P0_1( w_fp[23], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[61] );
+      helas_VVV1P0_1( w_fp[68], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_VVV1P0_1( w_fp[29], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[68] );
 
       // Amplitude(s) for diagram number 1214
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[61], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[61], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30184,7 +30188,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[114] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30204,7 +30208,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30231,7 +30235,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1215
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[72], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[72], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30251,7 +30255,7 @@ namespace mg5amcCpu
       jamp_sv[70] += amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30271,7 +30275,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30298,7 +30302,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1216
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30310,7 +30314,7 @@ namespace mg5amcCpu
       jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30322,7 +30326,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30341,7 +30345,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1217
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[71], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[71], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30349,7 +30353,7 @@ namespace mg5amcCpu
       jamp_sv[55] -= amp_sv[0];
       jamp_sv[57] -= amp_sv[0];
       jamp_sv[59] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30357,7 +30361,7 @@ namespace mg5amcCpu
       jamp_sv[56] += amp_sv[0];
       jamp_sv[57] -= amp_sv[0];
       jamp_sv[58] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[66], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[66], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30372,7 +30376,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1218
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30384,7 +30388,7 @@ namespace mg5amcCpu
       jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30396,7 +30400,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30415,7 +30419,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1219
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[77], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30423,7 +30427,7 @@ namespace mg5amcCpu
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[114] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[16], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[16], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30431,7 +30435,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[27], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[27], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30446,7 +30450,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1220
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30466,7 +30470,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30486,7 +30490,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30506,7 +30510,7 @@ namespace mg5amcCpu
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30526,7 +30530,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30546,7 +30550,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30566,7 +30570,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30586,7 +30590,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[117] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30606,7 +30610,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[117] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30630,12 +30634,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1221 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1221
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[73], COUPs[0], 1.0, 0., 0., w_fp[27] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[79], COUPs[0], 1.0, 0., 0., w_fp[1] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[80], COUPs[0], 1.0, 0., 0., w_fp[16] );
+      helas_VVV1P0_1( w_fp[0], w_fp[73], COUPs[0], 1.0, 0., 0., w_fp[27] );
+      helas_VVV1P0_1( w_fp[0], w_fp[79], COUPs[0], 1.0, 0., 0., w_fp[1] );
+      helas_VVV1P0_1( w_fp[0], w_fp[80], COUPs[0], 1.0, 0., 0., w_fp[16] );
 
       // Amplitude(s) for diagram number 1221
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30655,7 +30659,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[1], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[1], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30675,7 +30679,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30702,7 +30706,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1222
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[73], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[73], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30722,7 +30726,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[79], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[79], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30742,7 +30746,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[80], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[80], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30769,7 +30773,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1223
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30781,7 +30785,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30793,7 +30797,7 @@ namespace mg5amcCpu
       jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30812,7 +30816,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1224
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[113], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30820,7 +30824,7 @@ namespace mg5amcCpu
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[113], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30828,7 +30832,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[113], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30843,7 +30847,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1225
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30855,7 +30859,7 @@ namespace mg5amcCpu
       jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30867,7 +30871,7 @@ namespace mg5amcCpu
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30886,7 +30890,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1226
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30894,7 +30898,7 @@ namespace mg5amcCpu
       jamp_sv[38] -= amp_sv[0];
       jamp_sv[62] -= amp_sv[0];
       jamp_sv[86] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30902,7 +30906,7 @@ namespace mg5amcCpu
       jamp_sv[56] += amp_sv[0];
       jamp_sv[62] -= amp_sv[0];
       jamp_sv[80] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[62], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30917,7 +30921,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1227
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30937,7 +30941,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30957,7 +30961,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30977,7 +30981,7 @@ namespace mg5amcCpu
       jamp_sv[77] += amp_sv[0];
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30997,7 +31001,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31017,7 +31021,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31037,7 +31041,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31057,7 +31061,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31077,7 +31081,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31101,12 +31105,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1228 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1228
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[57], COUPs[0], 1.0, 0., 0., w_fp[62] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[81], COUPs[0], 1.0, 0., 0., w_fp[80] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[82], COUPs[0], 1.0, 0., 0., w_fp[79] );
+      helas_VVV1P0_1( w_fp[0], w_fp[57], COUPs[0], 1.0, 0., 0., w_fp[62] );
+      helas_VVV1P0_1( w_fp[0], w_fp[81], COUPs[0], 1.0, 0., 0., w_fp[80] );
+      helas_VVV1P0_1( w_fp[0], w_fp[82], COUPs[0], 1.0, 0., 0., w_fp[79] );
 
       // Amplitude(s) for diagram number 1228
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31126,7 +31130,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[80], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[80], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31146,7 +31150,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[79], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[79], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31173,7 +31177,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1229
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[57], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[57], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31193,7 +31197,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[81], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[81], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31213,7 +31217,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[82], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[82], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31240,7 +31244,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1230
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31252,7 +31256,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31264,7 +31268,7 @@ namespace mg5amcCpu
       jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[39], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31283,7 +31287,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1231
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31291,7 +31295,7 @@ namespace mg5amcCpu
       jamp_sv[73] -= amp_sv[0];
       jamp_sv[75] -= amp_sv[0];
       jamp_sv[77] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31299,7 +31303,7 @@ namespace mg5amcCpu
       jamp_sv[74] += amp_sv[0];
       jamp_sv[75] -= amp_sv[0];
       jamp_sv[76] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[102], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31314,7 +31318,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1232
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31326,7 +31330,7 @@ namespace mg5amcCpu
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31338,7 +31342,7 @@ namespace mg5amcCpu
       jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[38], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31357,7 +31361,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1233
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31365,7 +31369,7 @@ namespace mg5amcCpu
       jamp_sv[44] -= amp_sv[0];
       jamp_sv[68] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31373,7 +31377,7 @@ namespace mg5amcCpu
       jamp_sv[58] += amp_sv[0];
       jamp_sv[68] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[104], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31388,7 +31392,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1234
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31408,7 +31412,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31428,7 +31432,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31448,7 +31452,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31468,7 +31472,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31488,7 +31492,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31508,7 +31512,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31528,7 +31532,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31548,7 +31552,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31572,12 +31576,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1235 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1235
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[55], COUPs[0], 1.0, 0., 0., w_fp[104] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[83], COUPs[0], 1.0, 0., 0., w_fp[82] );
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[84], COUPs[0], 1.0, 0., 0., w_fp[81] );
+      helas_VVV1P0_1( w_fp[0], w_fp[55], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_VVV1P0_1( w_fp[0], w_fp[83], COUPs[0], 1.0, 0., 0., w_fp[82] );
+      helas_VVV1P0_1( w_fp[0], w_fp[84], COUPs[0], 1.0, 0., 0., w_fp[81] );
 
       // Amplitude(s) for diagram number 1235
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31597,7 +31601,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[82], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[82], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31617,7 +31621,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[81], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[81], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31644,7 +31648,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1236
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[55], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[55], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31664,7 +31668,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[83], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[83], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31684,7 +31688,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[84], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[84], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31711,7 +31715,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1237
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31723,7 +31727,7 @@ namespace mg5amcCpu
       jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31735,7 +31739,7 @@ namespace mg5amcCpu
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[33], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31754,7 +31758,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1238
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[114], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31762,7 +31766,7 @@ namespace mg5amcCpu
       jamp_sv[49] -= amp_sv[0];
       jamp_sv[51] -= amp_sv[0];
       jamp_sv[53] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[114], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31770,7 +31774,7 @@ namespace mg5amcCpu
       jamp_sv[50] += amp_sv[0];
       jamp_sv[51] -= amp_sv[0];
       jamp_sv[52] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[114], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31785,7 +31789,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1239
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31797,7 +31801,7 @@ namespace mg5amcCpu
       jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31809,7 +31813,7 @@ namespace mg5amcCpu
       jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[46], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31828,7 +31832,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1240
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31836,7 +31840,7 @@ namespace mg5amcCpu
       jamp_sv[46] -= amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31844,7 +31848,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/check_sa.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/check_sa.cc
+++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk
index 9cff5e1a60..0d8ea00f01 100644
--- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk
@@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
@@ -647,7 +650,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -776,6 +778,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
@@ -786,12 +796,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -962,6 +972,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
diff --git a/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h
index bcf4333c78..24e8114e3a 100644
--- a/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h
@@ -1400,8 +1400,189 @@ namespace mg5amcCpu
     return;
   }
 
+  //==========================================================================
+
+#ifndef MGONGPU_LINKER_HELAMPS
+
+#define helas_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_VVVV3_0 VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
+#define helas_VVVV4_0 VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+
+#else
+
+#define helas_VVV1_0 linker_VVV1_0
+#define helas_VVV1P0_1 linker_VVV1P0_1
+#define helas_FFV1_0 linker_FFV1_0
+#define helas_FFV1_1 linker_FFV1_1
+#define helas_FFV1_2 linker_FFV1_2
+#define helas_FFV1P0_3 linker_FFV1P0_3
+#define helas_VVVV1_0 linker_VVVV1_0
+#define helas_VVVV1P0_1 linker_VVVV1P0_1
+#define helas_VVVV3_0 linker_VVVV3_0
+#define helas_VVVV3P0_1 linker_VVVV3P0_1
+#define helas_VVVV4_0 linker_VVVV4_0
+#define helas_VVVV4P0_1 linker_VVVV4P0_1
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV1_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
   //--------------------------------------------------------------------------
 
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+#endif
+
+  //==========================================================================
+
 } // end namespace
 
 #endif // HelAmps_sm_H
diff --git a/epochX/cudacpp/gg_ttggg.sa/src/cudacpp_config.mk b/epochX/cudacpp/gg_ttggg.sa/src/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/gg_ttggg.sa/src/cudacpp_config.mk
+++ b/epochX/cudacpp/gg_ttggg.sa/src/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuConfig.h b/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuConfig.h
index ad528bf8f3..5d79a575e7 100644
--- a/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuConfig.h
+++ b/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -78,9 +78,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -147,6 +154,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {
diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt
index cb97eb9e35..a5d8f488eb 100644
--- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt
+++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt
@@ -61,7 +61,7 @@ set zerowidth_tchannel F
 define q = u c d s u~ c~ d~ s~
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005686521530151367 [0m
+[1;32mDEBUG: model prefixing  takes 0.005703926086425781 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams.
 INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. 
 INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. 
 INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. 
-8 processes with 40 diagrams generated in 0.076 s
+8 processes with 40 diagrams generated in 0.080 s
 Total: 8 processes with 40 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -200,8 +200,8 @@ INFO: Combined process g c~ > t t~ c~ WEIGHTED<=3 @1 with process g u~ > t t~ u~
 INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Creating files in directory P1_gu_ttxu 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9e8f1798b0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fead16eab20> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -217,12 +217,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gu_ttxu 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1614][0m [0m
 INFO: Creating files in directory P1_gux_ttxux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9e8efaa1c0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fead151d370> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -238,19 +238,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gux_ttxux 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1545][0m [0m
-Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s
-Wrote files for 32 helas calls in 0.249 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1614][0m [0m
+Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s
+Wrote files for 32 helas calls in 0.258 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.146 s
+ALOHA: aloha creates 2 routines in  0.152 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVV1 routines[0m
-ALOHA: aloha creates 4 routines in  0.133 s
+ALOHA: aloha creates 4 routines in  0.136 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -258,6 +258,8 @@ ALOHA: aloha creates 4 routines in  0.133 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.h
@@ -281,20 +283,14 @@ Hunk #1 succeeded at 528 (offset 44 lines).
 patching file driver.f
 patching file matrix1.f
 Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 162 (offset 19 lines).
-Hunk #3 succeeded at 247 (offset 26 lines).
-Hunk #4 succeeded at 281 (offset 32 lines).
-Hunk #5 succeeded at 326 (offset 32 lines).
+Hunk #2 succeeded at 246 (offset 26 lines).
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 Hunk #1 succeeded at 528 (offset 44 lines).
 patching file driver.f
 patching file matrix1.f
 Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 162 (offset 19 lines).
-Hunk #3 succeeded at 247 (offset 26 lines).
-Hunk #4 succeeded at 281 (offset 32 lines).
-Hunk #5 succeeded at 326 (offset 32 lines).
+Hunk #2 succeeded at 246 (offset 26 lines).
 [1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 242][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq done.
 Type "launch" to generate events from this process, or see
@@ -302,10 +298,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m3.389s
-user	0m1.964s
-sys	0m0.295s
-Code generation completed in 3 seconds
+real	0m2.833s
+user	0m2.015s
+sys	0m0.320s
+Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/gq_ttq.mad/SubProcesses/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/MemoryAccessGs.h
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/gq_ttq.mad/SubProcesses/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc
index ff05145462..1eba0f4747 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc
@@ -207,7 +207,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -219,7 +221,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -333,11 +337,11 @@ namespace mg5amcCpu
 
       oxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 );
 
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1_2( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[4], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -348,11 +352,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 5 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[7] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_FFV1P0_3( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[7] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -363,10 +367,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 5 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -377,10 +381,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 5 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_FFV1_1( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[5], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -394,7 +398,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 5
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/check_sa.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/check_sa.cc
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc
index c7ecdbf5bf..e6590d28ce 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc
@@ -207,7 +207,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -219,7 +221,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -333,11 +337,11 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 );
 
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1_2( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -348,11 +352,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 5 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_FFV1P0_3( w_fp[4], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -363,10 +367,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 5 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -377,10 +381,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 5 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_FFV1_1( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[5], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -394,7 +398,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 5
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/check_sa.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/check_sa.cc
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk
index 9cff5e1a60..0d8ea00f01 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk
@@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
@@ -647,7 +650,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -776,6 +778,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
@@ -786,12 +796,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -962,6 +972,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
diff --git a/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h b/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h
index 81f8722a61..496d8197a9 100644
--- a/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h
@@ -1073,8 +1073,88 @@ namespace mg5amcCpu
     return;
   }
 
+  //==========================================================================
+
+#ifndef MGONGPU_LINKER_HELAMPS
+
+#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+
+#else
+
+#define helas_FFV1_0 linker_FFV1_0
+#define helas_FFV1_1 linker_FFV1_1
+#define helas_FFV1_2 linker_FFV1_2
+#define helas_FFV1P0_3 linker_FFV1P0_3
+#define helas_VVV1_0 linker_VVV1_0
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] );
+
   //--------------------------------------------------------------------------
 
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV1_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+#endif
+
+  //==========================================================================
+
 } // end namespace
 
 #endif // HelAmps_sm_H
diff --git a/epochX/cudacpp/gq_ttq.mad/src/cudacpp_config.mk b/epochX/cudacpp/gq_ttq.mad/src/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/gq_ttq.mad/src/cudacpp_config.mk
+++ b/epochX/cudacpp/gq_ttq.mad/src/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuConfig.h
index 858546db00..cf6a228859 100644
--- a/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuConfig.h
+++ b/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -78,9 +78,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -147,6 +154,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {
diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt
index 1548b0cef5..269301a91d 100644
--- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt
+++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt
@@ -61,7 +61,7 @@ set zerowidth_tchannel F
 define q = u c d s u~ c~ d~ s~
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005625486373901367 [0m
+[1;32mDEBUG: model prefixing  takes 0.0057561397552490234 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams.
 INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. 
 INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. 
 INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. 
-8 processes with 40 diagrams generated in 0.077 s
+8 processes with 40 diagrams generated in 0.081 s
 Total: 8 processes with 40 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq
 Load PLUGIN.CUDACPP_OUTPUT
@@ -210,11 +210,11 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. 
-Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s
+Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.144 s
+ALOHA: aloha creates 2 routines in  0.149 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -222,6 +222,8 @@ ALOHA: aloha creates 2 routines in  0.144 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.h
@@ -230,7 +232,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. 
 quit
 
-real	0m0.659s
-user	0m0.597s
-sys	0m0.049s
+real	0m0.680s
+user	0m0.618s
+sys	0m0.056s
 Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/gq_ttq.sa/SubProcesses/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/MemoryAccessGs.h
+++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/gq_ttq.sa/SubProcesses/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 
diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc
index 52546a6c88..fd5a6dd91d 100644
--- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc
+++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc
@@ -207,7 +207,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -219,7 +221,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -333,11 +337,11 @@ namespace mg5amcCpu
 
       oxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 );
 
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1_2( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[4], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -347,11 +351,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 5 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[7] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_FFV1P0_3( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[7] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -361,10 +365,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 5 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -374,10 +378,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 5 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_FFV1_1( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[5], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -390,7 +394,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 5
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/check_sa.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/check_sa.cc
+++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc
index 1469f8bbb3..6393fe7844 100644
--- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc
+++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc
@@ -207,7 +207,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -219,7 +221,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -333,11 +337,11 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 );
 
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1_2( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -347,11 +351,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 5 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_FFV1P0_3( w_fp[4], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -361,10 +365,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 5 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -374,10 +378,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 5 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_FFV1_1( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[5], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -390,7 +394,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 5
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/check_sa.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/check_sa.cc
+++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk
index 9cff5e1a60..0d8ea00f01 100644
--- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk
@@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
@@ -647,7 +650,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -776,6 +778,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
@@ -786,12 +796,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -962,6 +972,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
diff --git a/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h b/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h
index 81f8722a61..496d8197a9 100644
--- a/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h
@@ -1073,8 +1073,88 @@ namespace mg5amcCpu
     return;
   }
 
+  //==========================================================================
+
+#ifndef MGONGPU_LINKER_HELAMPS
+
+#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+
+#else
+
+#define helas_FFV1_0 linker_FFV1_0
+#define helas_FFV1_1 linker_FFV1_1
+#define helas_FFV1_2 linker_FFV1_2
+#define helas_FFV1P0_3 linker_FFV1P0_3
+#define helas_VVV1_0 linker_VVV1_0
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] );
+
   //--------------------------------------------------------------------------
 
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV1_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+#endif
+
+  //==========================================================================
+
 } // end namespace
 
 #endif // HelAmps_sm_H
diff --git a/epochX/cudacpp/gq_ttq.sa/src/cudacpp_config.mk b/epochX/cudacpp/gq_ttq.sa/src/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/gq_ttq.sa/src/cudacpp_config.mk
+++ b/epochX/cudacpp/gq_ttq.sa/src/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuConfig.h b/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuConfig.h
index ad528bf8f3..5d79a575e7 100644
--- a/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuConfig.h
+++ b/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -78,9 +78,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -147,6 +154,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {
diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt
index d530a89960..c7dccb0dd3 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt
+++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt
@@ -149,8 +149,8 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > b b~ HIG<=1 HIW<=1 @1 
 INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 
 INFO: Creating files in directory P1_gg_bbx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fa44dca6fa0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fb17051fc70> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -166,23 +166,23 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > b b~ HIG<=1 HIW<=1 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_bbx 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/SubProcesses/P1_gg_bbx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 4 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 4 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1614][0m [0m
 Generated helas calls for 1 subprocesses (4 diagrams) in 0.009 s
-Wrote files for 12 helas calls in 0.119 s
+Wrote files for 12 helas calls in 0.123 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVS3 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFS2 routines[0m
-ALOHA: aloha creates 4 routines in  0.262 s
+ALOHA: aloha creates 4 routines in  0.272 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVS3 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFS2 routines[0m
-ALOHA: aloha creates 8 routines in  0.249 s
+ALOHA: aloha creates 8 routines in  0.255 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVS3
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -191,6 +191,8 @@ ALOHA: aloha creates 8 routines in  0.249 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFS2
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./HelAmps_heft.h
 INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./Parameters_heft.h
@@ -219,10 +221,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m3.154s
-user	0m1.883s
-sys	0m0.276s
-Code generation completed in 3 seconds
+real	0m2.226s
+user	0m1.956s
+sys	0m0.270s
+Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/MemoryAccessGs.h
+++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 
diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc
index da4bbb8411..538b157bc0 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc
+++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc
@@ -204,7 +204,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +218,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -328,10 +332,10 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 );
 
-      VVS3_3<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[0], 1.0, cIPD[1], cIPD[2], w_fp[4] );
+      helas_VVS3_3( w_fp[0], w_fp[1], COUPs[0], 1.0, cIPD[1], cIPD[2], w_fp[4] );
 
       // Amplitude(s) for diagram number 1
-      FFS2_0<W_ACCESS, A_ACCESS, CI_ACCESS>( w_fp[3], w_fp[2], w_fp[4], COUPs[ndcoup + 0], 1.0, &amp_fp[0] );
+      helas_FFS2_0( w_fp[3], w_fp[2], w_fp[4], COUPs[ndcoup + 0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -341,10 +345,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 4 ***
 
       // Wavefunction(s) for diagram number 2
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[4] );
+      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -355,10 +359,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 4 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[2], 1.0, cIPD[0], 0., w_fp[4] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[2], 1.0, cIPD[0], 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], w_fp[1], COUPs[2], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -368,10 +372,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 4 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[2], 1.0, cIPD[0], 0., w_fp[4] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[2], 1.0, cIPD[0], 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[2], w_fp[1], COUPs[2], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/check_sa.cc b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/check_sa.cc
+++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk
index 9cff5e1a60..0d8ea00f01 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk
@@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
@@ -647,7 +650,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -776,6 +778,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
@@ -786,12 +796,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -962,6 +972,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
diff --git a/epochX/cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h b/epochX/cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h
index 210b67369c..adae1c7f83 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h
+++ b/epochX/cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h
@@ -1116,8 +1116,102 @@ namespace mg5amcCpu
     return;
   }
 
+  //==========================================================================
+
+#ifndef MGONGPU_LINKER_HELAMPS
+
+#define helas_VVS3_3 VVS3_3<W_ACCESS, CD_ACCESS>
+#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_FFS2_0 FFS2_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+
+#else
+
+#define helas_VVS3_3 linker_VVS3_3
+#define helas_VVV1P0_1 linker_VVV1P0_1
+#define helas_FFV1_0 linker_FFV1_0
+#define helas_FFV1_1 linker_FFV1_1
+#define helas_FFV1_2 linker_FFV1_2
+#define helas_FFS2_0 linker_FFS2_0
+
   //--------------------------------------------------------------------------
 
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], V2[6]
+  __device__ void
+  linker_VVS3_3( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M3,
+                 const fptype W3,
+                 fptype allS3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], S3[6]
+  __device__ void
+  linker_FFS2_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allS3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+#endif
+
+  //==========================================================================
+
 } // end namespace
 
 #endif // HelAmps_heft_H
diff --git a/epochX/cudacpp/heft_gg_bb.mad/src/cudacpp_config.mk b/epochX/cudacpp/heft_gg_bb.mad/src/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/src/cudacpp_config.mk
+++ b/epochX/cudacpp/heft_gg_bb.mad/src/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/heft_gg_bb.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/heft_gg_bb.mad/src/mgOnGpuConfig.h
index 858546db00..cf6a228859 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/src/mgOnGpuConfig.h
+++ b/epochX/cudacpp/heft_gg_bb.mad/src/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -78,9 +78,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -147,6 +154,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {
diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt
index 14cb5a6988..12eb55167f 100644
--- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt
+++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt
@@ -156,7 +156,7 @@ ALOHA: aloha creates VVS3 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFS2 routines[0m
-ALOHA: aloha creates 4 routines in  0.278 s
+ALOHA: aloha creates 4 routines in  0.275 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVS3
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -165,6 +165,8 @@ ALOHA: aloha creates 4 routines in  0.278 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFS2
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./HelAmps_heft.h
 INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./Parameters_heft.h
@@ -173,7 +175,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. 
 quit
 
-real	0m0.756s
-user	0m0.610s
-sys	0m0.064s
+real	0m0.668s
+user	0m0.605s
+sys	0m0.058s
 Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/MemoryAccessGs.h
+++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 
diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.cc b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.cc
index 0d14a736e9..437dbf6116 100644
--- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.cc
+++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.cc
@@ -204,7 +204,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +218,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -328,10 +332,10 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 );
 
-      VVS3_3<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[0], 1.0, cIPD[1], cIPD[2], w_fp[4] );
+      helas_VVS3_3( w_fp[0], w_fp[1], COUPs[0], 1.0, cIPD[1], cIPD[2], w_fp[4] );
 
       // Amplitude(s) for diagram number 1
-      FFS2_0<W_ACCESS, A_ACCESS, CI_ACCESS>( w_fp[3], w_fp[2], w_fp[4], COUPs[ndcoup + 0], 1.0, &amp_fp[0] );
+      helas_FFS2_0( w_fp[3], w_fp[2], w_fp[4], COUPs[ndcoup + 0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -340,10 +344,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 4 ***
 
       // Wavefunction(s) for diagram number 2
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[4] );
+      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -353,10 +357,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 4 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[2], 1.0, cIPD[0], 0., w_fp[4] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[2], 1.0, cIPD[0], 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], w_fp[1], COUPs[2], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -365,10 +369,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 4 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[2], 1.0, cIPD[0], 0., w_fp[4] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[2], 1.0, cIPD[0], 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[2], w_fp[1], COUPs[2], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/check_sa.cc b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/check_sa.cc
+++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk
index 9cff5e1a60..0d8ea00f01 100644
--- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk
@@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
@@ -647,7 +650,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -776,6 +778,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
@@ -786,12 +796,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -962,6 +972,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
diff --git a/epochX/cudacpp/heft_gg_bb.sa/src/HelAmps_heft.h b/epochX/cudacpp/heft_gg_bb.sa/src/HelAmps_heft.h
index 210b67369c..adae1c7f83 100644
--- a/epochX/cudacpp/heft_gg_bb.sa/src/HelAmps_heft.h
+++ b/epochX/cudacpp/heft_gg_bb.sa/src/HelAmps_heft.h
@@ -1116,8 +1116,102 @@ namespace mg5amcCpu
     return;
   }
 
+  //==========================================================================
+
+#ifndef MGONGPU_LINKER_HELAMPS
+
+#define helas_VVS3_3 VVS3_3<W_ACCESS, CD_ACCESS>
+#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_FFS2_0 FFS2_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+
+#else
+
+#define helas_VVS3_3 linker_VVS3_3
+#define helas_VVV1P0_1 linker_VVV1P0_1
+#define helas_FFV1_0 linker_FFV1_0
+#define helas_FFV1_1 linker_FFV1_1
+#define helas_FFV1_2 linker_FFV1_2
+#define helas_FFS2_0 linker_FFS2_0
+
   //--------------------------------------------------------------------------
 
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], V2[6]
+  __device__ void
+  linker_VVS3_3( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M3,
+                 const fptype W3,
+                 fptype allS3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], S3[6]
+  __device__ void
+  linker_FFS2_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allS3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+#endif
+
+  //==========================================================================
+
 } // end namespace
 
 #endif // HelAmps_heft_H
diff --git a/epochX/cudacpp/heft_gg_bb.sa/src/cudacpp_config.mk b/epochX/cudacpp/heft_gg_bb.sa/src/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/heft_gg_bb.sa/src/cudacpp_config.mk
+++ b/epochX/cudacpp/heft_gg_bb.sa/src/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/heft_gg_bb.sa/src/mgOnGpuConfig.h b/epochX/cudacpp/heft_gg_bb.sa/src/mgOnGpuConfig.h
index ad528bf8f3..5d79a575e7 100644
--- a/epochX/cudacpp/heft_gg_bb.sa/src/mgOnGpuConfig.h
+++ b/epochX/cudacpp/heft_gg_bb.sa/src/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -78,9 +78,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -147,6 +154,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {
diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt
index c6b7a90b66..610871ee77 100644
--- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt
+++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt
@@ -61,7 +61,7 @@ set zerowidth_tchannel F
 define j = p
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.00522923469543457 [0m
+[1;32mDEBUG: model prefixing  takes 0.005677700042724609 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~
 INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ 
 INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ 
 INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ 
-5 processes with 7 diagrams generated in 0.029 s
+5 processes with 7 diagrams generated in 0.030 s
 Total: 5 processes with 7 diagrams
 add process p p > t t~ j @1
 INFO: Checking for minimal orders which gives processes. 
@@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~
 INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g 
 INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ 
 INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g 
-13 processes with 76 diagrams generated in 0.135 s
+13 processes with 76 diagrams generated in 0.142 s
 Total: 18 processes with 83 diagrams
 add process p p > t t~ j j @2
 INFO: Checking for minimal orders which gives processes. 
@@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~
 INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ 
 INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ 
 INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. 
-65 processes with 1119 diagrams generated in 1.855 s
+65 processes with 1119 diagrams generated in 1.868 s
 Total: 83 processes with 1202 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -499,8 +499,8 @@ INFO: Combined process c c~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED
 INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 
 INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 
 INFO: Creating files in directory P2_gg_ttxgg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fd323a3f700> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2000cf7700> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -516,12 +516,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxgg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 105 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 105 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [1;30m[model_handling.py at line 1614][0m [0m
 INFO: Creating files in directory P2_gg_ttxuux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fd3240d87c0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f20013acf10> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -537,12 +537,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxuux [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1614][0m [0m
 INFO: Creating files in directory P2_gu_ttxgu 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fd323ce7d00> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f20013acf10> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -558,12 +558,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gu_ttxgu [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1614][0m [0m
 INFO: Creating files in directory P2_gux_ttxgux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fd3240d87c0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2001001fa0> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -579,12 +579,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gux_ttxgux [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1614][0m [0m
 INFO: Creating files in directory P2_uux_ttxgg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fd323e3e400> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f20013acf10> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -600,12 +600,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxgg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1614][0m [0m
 INFO: Creating files in directory P1_gg_ttxg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fd323a3f700> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f20013acf10> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -621,12 +621,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxg 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gg_ttxg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1614][0m [0m
 INFO: Creating files in directory P2_uu_ttxuu 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fd323a3f700> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2000f93a60> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -642,12 +642,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uu_ttxuu [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1614][0m [0m
 INFO: Creating files in directory P2_uux_ttxuux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fd323d90850> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f20013fb3a0> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -663,12 +663,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxuux [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1614][0m [0m
 INFO: Creating files in directory P2_uxux_ttxuxux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fd323d3cf70> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2000ff4940> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -684,12 +684,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxux_ttxuxux [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1614][0m [0m
 INFO: Creating files in directory P2_uc_ttxuc 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fd3241e25b0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2000fc8b20> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -705,12 +705,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uc_ttxuc [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1614][0m [0m
 INFO: Creating files in directory P2_uux_ttxccx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fd3241e25b0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f20013894f0> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -726,12 +726,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxccx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1614][0m [0m
 INFO: Creating files in directory P2_ucx_ttxucx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fd3241e25b0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2000fe1a60> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -747,12 +747,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_ucx_ttxucx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1614][0m [0m
 INFO: Creating files in directory P2_uxcx_ttxuxcx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fd323d3ce20> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2000e099d0> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -768,12 +768,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxcx_ttxuxcx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1614][0m [0m
 INFO: Creating files in directory P1_gu_ttxu 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fd3241e25b0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f20013910d0> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -789,12 +789,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gu_ttxu 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gu_ttxu [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1614][0m [0m
 INFO: Creating files in directory P1_gux_ttxux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fd323e3e490> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2000fc7430> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -810,12 +810,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gux_ttxux 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gux_ttxux [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1614][0m [0m
 INFO: Creating files in directory P1_uux_ttxg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fd323e3e490> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2000f93b80> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -831,12 +831,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxg 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_uux_ttxg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1614][0m [0m
 INFO: Creating files in directory P0_gg_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fd323ce4640> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2000fd3040> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -852,12 +852,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1614][0m [0m
 INFO: Creating files in directory P0_uux_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fd323d4b880> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2000f93b80> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -873,25 +873,25 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttx 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_uux_ttx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 1 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1} [1;30m[model_handling.py at line 1545][0m [0m
-Generated helas calls for 18 subprocesses (372 diagrams) in 1.293 s
-Wrote files for 810 helas calls in 3.534 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 1 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1} [1;30m[model_handling.py at line 1614][0m [0m
+Generated helas calls for 18 subprocesses (372 diagrams) in 1.332 s
+Wrote files for 810 helas calls in 3.619 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.335 s
+ALOHA: aloha creates 5 routines in  0.350 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 10 routines in  0.315 s
+ALOHA: aloha creates 10 routines in  0.327 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -906,6 +906,8 @@ ALOHA: aloha creates 10 routines in  0.315 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.h
@@ -933,166 +935,115 @@ Hunk #1 succeeded at 539 (offset 55 lines).
 patching file driver.f
 patching file matrix1.f
 Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 146 (offset 3 lines).
-Hunk #3 succeeded at 224 (offset 3 lines).
-Hunk #4 succeeded at 252 (offset 3 lines).
-Hunk #5 succeeded at 297 (offset 3 lines).
+Hunk #2 succeeded at 223 (offset 3 lines).
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 patching file driver.f
 patching file matrix1.f
-Hunk #2 succeeded at 159 (offset 16 lines).
-Hunk #3 succeeded at 237 (offset 16 lines).
-Hunk #4 succeeded at 265 (offset 16 lines).
-Hunk #5 succeeded at 310 (offset 16 lines).
+Hunk #2 succeeded at 236 (offset 16 lines).
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 Hunk #1 succeeded at 528 (offset 44 lines).
 patching file driver.f
 patching file matrix1.f
 Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 162 (offset 19 lines).
-Hunk #3 succeeded at 240 (offset 19 lines).
-Hunk #4 succeeded at 268 (offset 19 lines).
-Hunk #5 succeeded at 313 (offset 19 lines).
+Hunk #2 succeeded at 239 (offset 19 lines).
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 Hunk #1 succeeded at 528 (offset 44 lines).
 patching file driver.f
 patching file matrix1.f
 Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 162 (offset 19 lines).
-Hunk #3 succeeded at 240 (offset 19 lines).
-Hunk #4 succeeded at 268 (offset 19 lines).
-Hunk #5 succeeded at 313 (offset 19 lines).
+Hunk #2 succeeded at 239 (offset 19 lines).
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_uux_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 Hunk #1 succeeded at 539 (offset 55 lines).
 patching file driver.f
 patching file matrix1.f
 Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 162 (offset 19 lines).
-Hunk #3 succeeded at 240 (offset 19 lines).
-Hunk #4 succeeded at 268 (offset 19 lines).
-Hunk #5 succeeded at 313 (offset 19 lines).
+Hunk #2 succeeded at 239 (offset 19 lines).
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 patching file driver.f
 patching file matrix1.f
-Hunk #2 succeeded at 191 (offset 48 lines).
-Hunk #3 succeeded at 269 (offset 48 lines).
-Hunk #4 succeeded at 297 (offset 48 lines).
-Hunk #5 succeeded at 342 (offset 48 lines).
+Hunk #2 succeeded at 268 (offset 48 lines).
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 Hunk #1 succeeded at 517 (offset 33 lines).
 patching file driver.f
 patching file matrix1.f
 Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 194 (offset 51 lines).
-Hunk #3 succeeded at 272 (offset 51 lines).
-Hunk #4 succeeded at 300 (offset 51 lines).
-Hunk #5 succeeded at 345 (offset 51 lines).
+Hunk #2 succeeded at 271 (offset 51 lines).
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gu_ttxgu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 Hunk #1 succeeded at 528 (offset 44 lines).
 patching file driver.f
 patching file matrix1.f
 Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 194 (offset 51 lines).
-Hunk #3 succeeded at 272 (offset 51 lines).
-Hunk #4 succeeded at 300 (offset 51 lines).
-Hunk #5 succeeded at 345 (offset 51 lines).
+Hunk #2 succeeded at 271 (offset 51 lines).
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gux_ttxgux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 Hunk #1 succeeded at 528 (offset 44 lines).
 patching file driver.f
 patching file matrix1.f
 Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 194 (offset 51 lines).
-Hunk #3 succeeded at 272 (offset 51 lines).
-Hunk #4 succeeded at 300 (offset 51 lines).
-Hunk #5 succeeded at 345 (offset 51 lines).
+Hunk #2 succeeded at 271 (offset 51 lines).
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uc_ttxuc; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 Hunk #1 succeeded at 555 (offset 71 lines).
 patching file driver.f
 patching file matrix1.f
 Hunk #1 succeeded at 77 (offset 5 lines).
-Hunk #2 succeeded at 196 (offset 53 lines).
-Hunk #3 succeeded at 274 (offset 53 lines).
-Hunk #4 succeeded at 302 (offset 53 lines).
-Hunk #5 succeeded at 347 (offset 53 lines).
+Hunk #2 succeeded at 273 (offset 53 lines).
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_ucx_ttxucx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 Hunk #1 succeeded at 627 (offset 143 lines).
 patching file driver.f
 patching file matrix1.f
 Hunk #1 succeeded at 83 (offset 11 lines).
-Hunk #2 succeeded at 202 (offset 59 lines).
-Hunk #3 succeeded at 280 (offset 59 lines).
-Hunk #4 succeeded at 308 (offset 59 lines).
-Hunk #5 succeeded at 353 (offset 59 lines).
+Hunk #2 succeeded at 279 (offset 59 lines).
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uu_ttxuu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 Hunk #1 succeeded at 539 (offset 55 lines).
 patching file driver.f
 patching file matrix1.f
 Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 194 (offset 51 lines).
-Hunk #3 succeeded at 272 (offset 51 lines).
-Hunk #4 succeeded at 300 (offset 51 lines).
-Hunk #5 succeeded at 345 (offset 51 lines).
+Hunk #2 succeeded at 271 (offset 51 lines).
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxccx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 Hunk #1 succeeded at 627 (offset 143 lines).
 patching file driver.f
 patching file matrix1.f
 Hunk #1 succeeded at 83 (offset 11 lines).
-Hunk #2 succeeded at 202 (offset 59 lines).
-Hunk #3 succeeded at 280 (offset 59 lines).
-Hunk #4 succeeded at 308 (offset 59 lines).
-Hunk #5 succeeded at 353 (offset 59 lines).
+Hunk #2 succeeded at 279 (offset 59 lines).
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 Hunk #1 succeeded at 539 (offset 55 lines).
 patching file driver.f
 patching file matrix1.f
 Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 194 (offset 51 lines).
-Hunk #3 succeeded at 272 (offset 51 lines).
-Hunk #4 succeeded at 300 (offset 51 lines).
-Hunk #5 succeeded at 345 (offset 51 lines).
+Hunk #2 succeeded at 271 (offset 51 lines).
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 Hunk #1 succeeded at 539 (offset 55 lines).
 patching file driver.f
 patching file matrix1.f
 Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 194 (offset 51 lines).
-Hunk #3 succeeded at 272 (offset 51 lines).
-Hunk #4 succeeded at 300 (offset 51 lines).
-Hunk #5 succeeded at 345 (offset 51 lines).
+Hunk #2 succeeded at 271 (offset 51 lines).
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxcx_ttxuxcx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 Hunk #1 succeeded at 555 (offset 71 lines).
 patching file driver.f
 patching file matrix1.f
 Hunk #1 succeeded at 77 (offset 5 lines).
-Hunk #2 succeeded at 196 (offset 53 lines).
-Hunk #3 succeeded at 274 (offset 53 lines).
-Hunk #4 succeeded at 302 (offset 53 lines).
-Hunk #5 succeeded at 347 (offset 53 lines).
+Hunk #2 succeeded at 273 (offset 53 lines).
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxux_ttxuxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 Hunk #1 succeeded at 539 (offset 55 lines).
 patching file driver.f
 patching file matrix1.f
 Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 194 (offset 51 lines).
-Hunk #3 succeeded at 272 (offset 51 lines).
-Hunk #4 succeeded at 300 (offset 51 lines).
-Hunk #5 succeeded at 345 (offset 51 lines).
+Hunk #2 succeeded at 271 (offset 51 lines).
 [1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 242][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j done.
 Type "launch" to generate events from this process, or see
@@ -1100,9 +1051,9 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m11.245s
-user	0m10.299s
-sys	0m0.899s
+real	0m11.676s
+user	0m10.606s
+sys	0m0.897s
 Code generation completed in 12 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MemoryAccessGs.h
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc
index 7e6595169c..6e4e81d6bf 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc
@@ -204,7 +204,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +218,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -328,10 +332,10 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 );
 
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
+      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -342,10 +346,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 3 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -355,10 +359,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 3 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/check_sa.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc
index 2a19d6a513..9e3e37be16 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc
@@ -207,7 +207,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -219,7 +221,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -331,10 +335,10 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 );
 
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[4] );
+      helas_FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/check_sa.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc
index ed7203959e..3fa4e019da 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc
@@ -204,7 +204,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +218,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -330,11 +334,11 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 );
 
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -347,10 +351,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 16 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
+      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -361,10 +365,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 16 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -375,11 +379,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 16 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -389,10 +393,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 16 ***
 
       // Wavefunction(s) for diagram number 5
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -406,7 +410,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -416,11 +420,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 16 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -433,7 +437,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 8
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -447,7 +451,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 9
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -457,10 +461,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 16 ***
 
       // Wavefunction(s) for diagram number 10
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 10
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -474,7 +478,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 11
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -488,7 +492,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 12
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -504,7 +508,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 13
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -517,7 +521,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -530,7 +534,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 15
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -543,22 +547,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 16 OF 16 ***
 
       // Wavefunction(s) for diagram number 16
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 16
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[4] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/check_sa.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc
index 4af0dad276..b2f73fb903 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc
@@ -207,7 +207,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -219,7 +221,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -333,11 +337,11 @@ namespace mg5amcCpu
 
       oxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 );
 
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1_2( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[4], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -348,11 +352,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 5 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_FFV1P0_3( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[7] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -363,10 +367,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 5 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -377,10 +381,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 5 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] );
+      helas_FFV1_1( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[5], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -394,7 +398,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 5
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[7], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/check_sa.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc
index 1f516b00c5..51d5ffc4ff 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc
@@ -207,7 +207,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -219,7 +221,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -333,11 +337,11 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 );
 
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1_2( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[1], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -348,11 +352,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 5 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_FFV1P0_3( w_fp[4], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -363,10 +367,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 5 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -377,10 +381,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 5 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] );
+      helas_FFV1_1( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[5], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -394,7 +398,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 5
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[7], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/check_sa.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc
index 3a427ed045..09af3b4355 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc
@@ -207,7 +207,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -219,7 +221,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -333,11 +337,11 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 );
 
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[5] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1_2( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[5] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[1], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -348,11 +352,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 5 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -363,10 +367,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 5 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -377,10 +381,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 5 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[5] );
+      helas_FFV1_1( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[5], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[0], w_fp[5], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -394,7 +398,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 5
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[7], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[7], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/check_sa.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc
index c97b13d6fe..2f80b93422 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc
@@ -204,7 +204,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +218,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -332,11 +336,11 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 );
 
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
 
       // Amplitude(s) for diagram number 1
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -345,7 +349,7 @@ namespace mg5amcCpu
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -354,7 +358,7 @@ namespace mg5amcCpu
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -367,10 +371,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 123 ***
 
       // Wavefunction(s) for diagram number 2
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[6], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[8] );
+      helas_VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[5], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -387,10 +391,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 123 ***
 
       // Wavefunction(s) for diagram number 3
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[6], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[9] );
+      helas_VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 3
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[4], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -407,10 +411,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 123 ***
 
       // Wavefunction(s) for diagram number 4
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 4
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -427,11 +431,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 123 ***
 
       // Wavefunction(s) for diagram number 5
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -445,7 +449,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -458,10 +462,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 123 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -472,10 +476,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 8 OF 123 ***
 
       // Wavefunction(s) for diagram number 8
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
 
       // Amplitude(s) for diagram number 8
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -489,7 +493,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 9
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[14], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -502,10 +506,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 123 ***
 
       // Wavefunction(s) for diagram number 10
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
+      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
 
       // Amplitude(s) for diagram number 10
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -516,10 +520,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 123 ***
 
       // Wavefunction(s) for diagram number 11
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
 
       // Amplitude(s) for diagram number 11
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[16], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -533,7 +537,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 12
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -549,7 +553,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 13
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[16], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -563,7 +567,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -579,7 +583,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 15
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -595,7 +599,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 16
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -608,12 +612,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 17 OF 123 ***
 
       // Wavefunction(s) for diagram number 17
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_FFV1_1( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 17
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[8], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -623,10 +627,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 18 OF 123 ***
 
       // Wavefunction(s) for diagram number 18
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_1( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 18
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -639,7 +643,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 19
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[12], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -650,11 +654,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 20 OF 123 ***
 
       // Wavefunction(s) for diagram number 20
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] );
+      helas_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] );
 
       // Amplitude(s) for diagram number 20
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[5], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -670,7 +674,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 21
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -684,7 +688,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 22
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[12], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -695,10 +699,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 23 OF 123 ***
 
       // Wavefunction(s) for diagram number 23
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[18] );
+      helas_VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[18] );
 
       // Amplitude(s) for diagram number 23
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[18], w_fp[4], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -714,7 +718,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 24
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[8], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -728,7 +732,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 25
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[12], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -739,10 +743,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 26 OF 123 ***
 
       // Wavefunction(s) for diagram number 26
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[19] );
+      helas_FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[19] );
 
       // Amplitude(s) for diagram number 26
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[19], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -755,7 +759,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 27
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -768,7 +772,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 28
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[19], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -781,7 +785,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 29
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[8], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -794,7 +798,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 30
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[19], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -808,7 +812,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 31
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[10], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -821,22 +825,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 32 OF 123 ***
 
       // Wavefunction(s) for diagram number 32
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[19] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[8] );
+      helas_VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] );
+      helas_VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[19] );
+      helas_VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 32
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[12], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[12], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[4] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[12], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
@@ -845,12 +849,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 33 OF 123 ***
 
       // Wavefunction(s) for diagram number 33
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
 
       // Amplitude(s) for diagram number 33
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -860,10 +864,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 34 OF 123 ***
 
       // Wavefunction(s) for diagram number 34
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_FFV1_2( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 34
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 34 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -876,7 +880,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 35
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[9], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -887,10 +891,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 36 OF 123 ***
 
       // Wavefunction(s) for diagram number 36
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[22] );
+      helas_FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[22] );
 
       // Amplitude(s) for diagram number 36
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -906,7 +910,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 37
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 37 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -920,7 +924,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 38
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 38 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -934,7 +938,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 39
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[18], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 39 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -950,7 +954,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 40
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 40 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -964,7 +968,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 41
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[11], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 41 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -975,10 +979,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 42 OF 123 ***
 
       // Wavefunction(s) for diagram number 42
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_FFV1_2( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
 
       // Amplitude(s) for diagram number 42
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[23], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 42 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -991,7 +995,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 43
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 43 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1004,7 +1008,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 44
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[23], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 44 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1017,7 +1021,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 45
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 45 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1030,7 +1034,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 46
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[23], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 46 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1044,7 +1048,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 47
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[10], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 47 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1060,17 +1064,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 48
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[21] += amp_sv[0];
@@ -1079,11 +1083,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 49 OF 123 ***
 
       // Wavefunction(s) for diagram number 49
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[12] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
+      helas_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[12] );
+      helas_FFV1_2( w_fp[3], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
 
       // Amplitude(s) for diagram number 49
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 49 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1094,10 +1098,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 50 OF 123 ***
 
       // Wavefunction(s) for diagram number 50
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[23] );
 
       // Amplitude(s) for diagram number 50
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 50 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1113,7 +1117,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 51
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[9], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 51 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1124,10 +1128,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 52 OF 123 ***
 
       // Wavefunction(s) for diagram number 52
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
+      helas_FFV1_1( w_fp[2], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
 
       // Amplitude(s) for diagram number 52
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 52 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1141,7 +1145,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 53
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 53 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1157,7 +1161,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 54
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 54 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1171,7 +1175,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 55
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 55 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1187,7 +1191,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 56
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 56 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1203,7 +1207,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 57
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[18], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 57 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1223,7 +1227,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 58
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1232,7 +1236,7 @@ namespace mg5amcCpu
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1241,7 +1245,7 @@ namespace mg5amcCpu
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1254,10 +1258,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 59 OF 123 ***
 
       // Wavefunction(s) for diagram number 59
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] );
+      helas_VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 59
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 59 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1277,7 +1281,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 60
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 60 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1297,7 +1301,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 61
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 61 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1313,7 +1317,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 62
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 62 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1327,7 +1331,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 63
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 63 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1343,7 +1347,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 64
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 64 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1354,11 +1358,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 65 OF 123 ***
 
       // Wavefunction(s) for diagram number 65
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] );
+      helas_FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 65
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 65 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1369,10 +1373,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 66 OF 123 ***
 
       // Wavefunction(s) for diagram number 66
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[20], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[22] );
+      helas_VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[22] );
 
       // Amplitude(s) for diagram number 66
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 66 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1388,7 +1392,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 67
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[9], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 67 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1399,10 +1403,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 68 OF 123 ***
 
       // Wavefunction(s) for diagram number 68
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
 
       // Amplitude(s) for diagram number 68
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 68 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1416,7 +1420,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 69
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 69 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1432,7 +1436,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 70
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[11], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 70 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1446,7 +1450,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 71
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[23], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 71 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1462,7 +1466,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 72
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 72 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1478,7 +1482,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 73
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[6], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 73 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1498,7 +1502,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 74
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1507,7 +1511,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1516,7 +1520,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1529,10 +1533,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 75 OF 123 ***
 
       // Wavefunction(s) for diagram number 75
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[12] );
+      helas_VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[12] );
 
       // Amplitude(s) for diagram number 75
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[4], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 75 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1552,7 +1556,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 76
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[7], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 76 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1572,7 +1576,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 77
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 77 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1588,7 +1592,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 78
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 78 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1602,7 +1606,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 79
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 79 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1618,7 +1622,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 80
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[23], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 80 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1629,10 +1633,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 81 OF 123 ***
 
       // Wavefunction(s) for diagram number 81
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[9], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_FFV1_1( w_fp[9], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
 
       // Amplitude(s) for diagram number 81
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 81 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1642,10 +1646,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 82 OF 123 ***
 
       // Wavefunction(s) for diagram number 82
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 82
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 82 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1658,7 +1662,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 83
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 83 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1668,10 +1672,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 84 OF 123 ***
 
       // Wavefunction(s) for diagram number 84
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 84
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 84 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1684,7 +1688,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 85
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[23], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 85 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1695,10 +1699,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 86 OF 123 ***
 
       // Wavefunction(s) for diagram number 86
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[23] );
 
       // Amplitude(s) for diagram number 86
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 86 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1711,10 +1715,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 87 OF 123 ***
 
       // Wavefunction(s) for diagram number 87
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
+      helas_FFV1_2( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
 
       // Amplitude(s) for diagram number 87
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 87 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1724,10 +1728,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 88 OF 123 ***
 
       // Wavefunction(s) for diagram number 88
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[11], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
+      helas_FFV1_1( w_fp[11], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
 
       // Amplitude(s) for diagram number 88
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 88 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1740,7 +1744,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 89
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 89 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1750,10 +1754,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 90 OF 123 ***
 
       // Wavefunction(s) for diagram number 90
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[14], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] );
+      helas_FFV1_1( w_fp[14], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] );
 
       // Amplitude(s) for diagram number 90
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[24], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 90 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1766,7 +1770,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 91
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[22], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 91 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1780,7 +1784,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 92
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 92 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1796,7 +1800,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 93
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1805,7 +1809,7 @@ namespace mg5amcCpu
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1814,7 +1818,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1827,10 +1831,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 94 OF 123 ***
 
       // Wavefunction(s) for diagram number 94
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[22] );
+      helas_VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[22] );
 
       // Amplitude(s) for diagram number 94
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 94 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1847,10 +1851,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 95 OF 123 ***
 
       // Wavefunction(s) for diagram number 95
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[25] );
+      helas_VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[25] );
 
       // Amplitude(s) for diagram number 95
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[5], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 95 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1870,7 +1874,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 96
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[14], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 96 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1886,7 +1890,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 97
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[24], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 97 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1900,7 +1904,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 98
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 98 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1916,7 +1920,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 99
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 99 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1930,7 +1934,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 100
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1939,7 +1943,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1948,7 +1952,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1961,10 +1965,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 101 OF 123 ***
 
       // Wavefunction(s) for diagram number 101
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[18], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 101
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[7], w_fp[4], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 101 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1984,7 +1988,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 102
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[18], w_fp[4], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 102 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2004,7 +2008,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 103
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 103 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2020,7 +2024,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 104
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 104 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2034,7 +2038,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 105
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 105 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2050,7 +2054,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 106
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 106 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2064,7 +2068,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 107
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2073,7 +2077,7 @@ namespace mg5amcCpu
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2082,7 +2086,7 @@ namespace mg5amcCpu
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2098,7 +2102,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 108
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[10], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 108 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2118,7 +2122,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 109
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 109 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2138,7 +2142,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 110
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 110 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2151,7 +2155,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 111
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 111 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2164,7 +2168,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 112
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[24], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 112 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2177,7 +2181,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 113
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 113 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2187,12 +2191,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 114 OF 123 ***
 
       // Wavefunction(s) for diagram number 114
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[12] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[24] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[12] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[24] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 114
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2201,7 +2205,7 @@ namespace mg5amcCpu
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2210,7 +2214,7 @@ namespace mg5amcCpu
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2226,17 +2230,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 115
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] += amp_sv[0];
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[14], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
       jamp_sv[22] += amp_sv[0];
@@ -2248,17 +2252,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 116
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[12] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[12] += amp_sv[0];
@@ -2267,12 +2271,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 117 OF 123 ***
 
       // Wavefunction(s) for diagram number 117
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
 
       // Amplitude(s) for diagram number 117
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[21], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2281,7 +2285,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2290,7 +2294,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2306,17 +2310,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 118
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] += amp_sv[0];
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[16] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
       jamp_sv[16] += amp_sv[0];
@@ -2328,17 +2332,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 119
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[18] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[18] += amp_sv[0];
@@ -2347,22 +2351,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 120 OF 123 ***
 
       // Wavefunction(s) for diagram number 120
-      VVVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
-      VVVV3P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] );
-      VVVV4P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
+      helas_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
+      helas_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] );
+      helas_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
 
       // Amplitude(s) for diagram number 120
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[10] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
       jamp_sv[10] += amp_sv[0];
@@ -2374,17 +2378,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 121
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[19] += amp_sv[0];
@@ -2396,7 +2400,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 122
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[24], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2405,7 +2409,7 @@ namespace mg5amcCpu
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2414,7 +2418,7 @@ namespace mg5amcCpu
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2430,7 +2434,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 123
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[17], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2439,7 +2443,7 @@ namespace mg5amcCpu
       jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[19], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2448,7 +2452,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/check_sa.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc
index 77c1c12ab0..7390662111 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc
@@ -207,7 +207,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -219,7 +221,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -335,12 +339,12 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 );
 
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -353,10 +357,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 36 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[8], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -369,10 +373,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 36 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -385,10 +389,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 36 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -401,10 +405,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 36 ***
 
       // Wavefunction(s) for diagram number 5
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -417,11 +421,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 36 ***
 
       // Wavefunction(s) for diagram number 6
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -432,11 +436,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 36 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[10] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_FFV1_1( w_fp[4], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[10], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[10], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -447,10 +451,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 8 OF 36 ***
 
       // Wavefunction(s) for diagram number 8
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[12] );
+      helas_FFV1_2( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[12] );
 
       // Amplitude(s) for diagram number 8
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -461,10 +465,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 9 OF 36 ***
 
       // Wavefunction(s) for diagram number 9
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[9], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_FFV1_1( w_fp[9], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 9
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -478,7 +482,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 10
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -489,11 +493,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 36 ***
 
       // Wavefunction(s) for diagram number 11
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 11
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -504,10 +508,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 36 ***
 
       // Wavefunction(s) for diagram number 12
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 12
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[10], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[10], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -521,7 +525,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 13
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[4], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[4], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -532,10 +536,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 14 OF 36 ***
 
       // Wavefunction(s) for diagram number 14
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[11], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_FFV1_2( w_fp[11], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
 
       // Amplitude(s) for diagram number 14
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -549,7 +553,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 15
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -560,11 +564,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 16 OF 36 ***
 
       // Wavefunction(s) for diagram number 16
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[9] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_FFV1_1( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1P0_3( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 16
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -578,7 +582,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 17
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -592,7 +596,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 18
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -603,10 +607,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 19 OF 36 ***
 
       // Wavefunction(s) for diagram number 19
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[9], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_FFV1_1( w_fp[9], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 19
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[11], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[11], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -620,7 +624,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 20
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -631,11 +635,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 21 OF 36 ***
 
       // Wavefunction(s) for diagram number 21
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[14] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_FFV1_2( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_FFV1P0_3( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 21
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -649,7 +653,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 22
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -663,7 +667,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 23
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -674,10 +678,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 24 OF 36 ***
 
       // Wavefunction(s) for diagram number 24
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[14], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1_2( w_fp[14], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 24
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -691,7 +695,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 25
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -702,10 +706,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 26 OF 36 ***
 
       // Wavefunction(s) for diagram number 26
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_FFV1_1( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
 
       // Amplitude(s) for diagram number 26
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -716,10 +720,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 27 OF 36 ***
 
       // Wavefunction(s) for diagram number 27
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] );
+      helas_VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 27
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -730,10 +734,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 28 OF 36 ***
 
       // Wavefunction(s) for diagram number 28
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_FFV1_2( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 28
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -747,7 +751,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 29
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -758,10 +762,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 30 OF 36 ***
 
       // Wavefunction(s) for diagram number 30
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[10], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1_1( w_fp[10], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 30
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -772,10 +776,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 31 OF 36 ***
 
       // Wavefunction(s) for diagram number 31
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 31
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -786,10 +790,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 32 OF 36 ***
 
       // Wavefunction(s) for diagram number 32
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_FFV1_2( w_fp[12], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 32
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 32 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -803,7 +807,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 33
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[4], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[4], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -817,17 +821,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 34
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += 1. / 2. * amp_sv[0];
       jamp_sv[2] -= 1. / 2. * amp_sv[0];
       jamp_sv[9] -= 1. / 2. * amp_sv[0];
       jamp_sv[10] += 1. / 2. * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += 1. / 2. * amp_sv[0];
       jamp_sv[5] -= 1. / 2. * amp_sv[0];
       jamp_sv[6] -= 1. / 2. * amp_sv[0];
       jamp_sv[10] += 1. / 2. * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += 1. / 2. * amp_sv[0];
       jamp_sv[5] -= 1. / 2. * amp_sv[0];
       jamp_sv[6] -= 1. / 2. * amp_sv[0];
@@ -839,7 +843,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 35
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -855,7 +859,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 36
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/check_sa.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc
index bb8c1fdf98..6c2056c725 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc
@@ -207,7 +207,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -219,7 +221,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -335,12 +339,12 @@ namespace mg5amcCpu
 
       oxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 );
 
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_FFV1_1( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[1], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -353,10 +357,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 36 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_FFV1_2( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[8], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -369,10 +373,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 36 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -385,10 +389,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 36 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -401,10 +405,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 36 ***
 
       // Wavefunction(s) for diagram number 5
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -417,11 +421,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 36 ***
 
       // Wavefunction(s) for diagram number 6
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -432,11 +436,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 36 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_FFV1_1( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[10], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[1], w_fp[10], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -447,10 +451,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 8 OF 36 ***
 
       // Wavefunction(s) for diagram number 8
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[12] );
+      helas_FFV1_2( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[12] );
 
       // Amplitude(s) for diagram number 8
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[5], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[5], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -461,10 +465,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 9 OF 36 ***
 
       // Wavefunction(s) for diagram number 9
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[9], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 9
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -478,7 +482,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 10
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[8], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[8], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -489,11 +493,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 36 ***
 
       // Wavefunction(s) for diagram number 11
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 11
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -504,10 +508,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 36 ***
 
       // Wavefunction(s) for diagram number 12
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 12
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[10], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[1], w_fp[10], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -521,7 +525,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 13
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[5], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[5], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -532,10 +536,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 14 OF 36 ***
 
       // Wavefunction(s) for diagram number 14
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[11], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_FFV1_2( w_fp[11], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
 
       // Amplitude(s) for diagram number 14
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -549,7 +553,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 15
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -560,11 +564,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 16 OF 36 ***
 
       // Wavefunction(s) for diagram number 16
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[9] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_FFV1_1( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1P0_3( w_fp[1], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 16
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -578,7 +582,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 17
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -592,7 +596,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 18
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -603,10 +607,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 19 OF 36 ***
 
       // Wavefunction(s) for diagram number 19
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[9], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 19
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[11], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[1], w_fp[11], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -620,7 +624,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 20
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -631,11 +635,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 21 OF 36 ***
 
       // Wavefunction(s) for diagram number 21
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[14] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[14], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_FFV1_2( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_FFV1P0_3( w_fp[14], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 21
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -649,7 +653,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 22
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -663,7 +667,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 23
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -674,10 +678,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 24 OF 36 ***
 
       // Wavefunction(s) for diagram number 24
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1_2( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 24
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -691,7 +695,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 25
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -702,10 +706,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 26 OF 36 ***
 
       // Wavefunction(s) for diagram number 26
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_FFV1_1( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
 
       // Amplitude(s) for diagram number 26
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -716,10 +720,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 27 OF 36 ***
 
       // Wavefunction(s) for diagram number 27
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] );
+      helas_VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 27
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -730,10 +734,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 28 OF 36 ***
 
       // Wavefunction(s) for diagram number 28
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_FFV1_2( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 28
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -747,7 +751,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 29
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -758,10 +762,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 30 OF 36 ***
 
       // Wavefunction(s) for diagram number 30
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[10], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1_1( w_fp[10], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 30
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[1], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -772,10 +776,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 31 OF 36 ***
 
       // Wavefunction(s) for diagram number 31
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 31
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[1], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -786,10 +790,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 32 OF 36 ***
 
       // Wavefunction(s) for diagram number 32
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_FFV1_2( w_fp[12], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 32
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 32 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -803,7 +807,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 33
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[5], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[5], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -817,17 +821,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 34
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= 1. / 2. * amp_sv[0];
       jamp_sv[5] += 1. / 2. * amp_sv[0];
       jamp_sv[8] -= 1. / 2. * amp_sv[0];
       jamp_sv[10] += 1. / 2. * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += 1. / 2. * amp_sv[0];
       jamp_sv[2] -= 1. / 2. * amp_sv[0];
       jamp_sv[7] += 1. / 2. * amp_sv[0];
       jamp_sv[8] -= 1. / 2. * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += 1. / 2. * amp_sv[0];
       jamp_sv[5] -= 1. / 2. * amp_sv[0];
       jamp_sv[7] += 1. / 2. * amp_sv[0];
@@ -839,7 +843,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 35
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[8], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[8], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -855,7 +859,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 36
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/check_sa.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc
index 53460aae19..185b8b5c09 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc
@@ -207,7 +207,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -219,7 +221,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -335,12 +339,12 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 );
 
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -353,10 +357,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 36 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[8], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -369,10 +373,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 36 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -385,10 +389,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 36 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -401,10 +405,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 36 ***
 
       // Wavefunction(s) for diagram number 5
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -417,11 +421,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 36 ***
 
       // Wavefunction(s) for diagram number 6
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -432,11 +436,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 36 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_FFV1_1( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[10], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[10], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -447,10 +451,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 8 OF 36 ***
 
       // Wavefunction(s) for diagram number 8
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[12] );
+      helas_FFV1_2( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[12] );
 
       // Amplitude(s) for diagram number 8
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[1], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -461,10 +465,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 9 OF 36 ***
 
       // Wavefunction(s) for diagram number 9
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[9], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 9
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -478,7 +482,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 10
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[8], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[8], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -489,11 +493,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 36 ***
 
       // Wavefunction(s) for diagram number 11
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 11
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -504,10 +508,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 36 ***
 
       // Wavefunction(s) for diagram number 12
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 12
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[10], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[10], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -521,7 +525,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 13
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[1], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -532,10 +536,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 14 OF 36 ***
 
       // Wavefunction(s) for diagram number 14
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[11], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_FFV1_2( w_fp[11], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
 
       // Amplitude(s) for diagram number 14
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -549,7 +553,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 15
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -560,11 +564,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 16 OF 36 ***
 
       // Wavefunction(s) for diagram number 16
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[9] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_FFV1_1( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1P0_3( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 16
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -578,7 +582,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 17
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -592,7 +596,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 18
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -603,10 +607,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 19 OF 36 ***
 
       // Wavefunction(s) for diagram number 19
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[9], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 19
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[11], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[11], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -620,7 +624,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 20
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -631,11 +635,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 21 OF 36 ***
 
       // Wavefunction(s) for diagram number 21
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[14] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[14], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_FFV1_2( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_FFV1P0_3( w_fp[14], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 21
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -649,7 +653,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 22
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -663,7 +667,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 23
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -674,10 +678,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 24 OF 36 ***
 
       // Wavefunction(s) for diagram number 24
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1_2( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 24
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -691,7 +695,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 25
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -702,10 +706,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 26 OF 36 ***
 
       // Wavefunction(s) for diagram number 26
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_FFV1_1( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
 
       // Amplitude(s) for diagram number 26
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -716,10 +720,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 27 OF 36 ***
 
       // Wavefunction(s) for diagram number 27
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] );
+      helas_VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 27
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -730,10 +734,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 28 OF 36 ***
 
       // Wavefunction(s) for diagram number 28
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_FFV1_2( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 28
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -747,7 +751,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 29
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -758,10 +762,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 30 OF 36 ***
 
       // Wavefunction(s) for diagram number 30
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[10], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1_1( w_fp[10], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 30
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -772,10 +776,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 31 OF 36 ***
 
       // Wavefunction(s) for diagram number 31
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 31
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -786,10 +790,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 32 OF 36 ***
 
       // Wavefunction(s) for diagram number 32
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_FFV1_2( w_fp[12], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 32
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 32 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -803,7 +807,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 33
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[1], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -817,17 +821,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 34
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= 1. / 2. * amp_sv[0];
       jamp_sv[7] += 1. / 2. * amp_sv[0];
       jamp_sv[8] -= 1. / 2. * amp_sv[0];
       jamp_sv[11] += 1. / 2. * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= 1. / 2. * amp_sv[0];
       jamp_sv[3] -= 1. / 2. * amp_sv[0];
       jamp_sv[7] += 1. / 2. * amp_sv[0];
       jamp_sv[11] += 1. / 2. * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= 1. / 2. * amp_sv[0];
       jamp_sv[3] -= 1. / 2. * amp_sv[0];
       jamp_sv[4] += 1. / 2. * amp_sv[0];
@@ -839,7 +843,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 35
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[8], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[8], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -855,7 +859,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 36
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/check_sa.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc
index 06730db834..676ae8fe28 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc
@@ -209,7 +209,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -221,7 +223,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -337,12 +341,12 @@ namespace mg5amcCpu
 
       oxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 );
 
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -355,10 +359,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 7 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -371,10 +375,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 7 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -385,10 +389,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 7 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_FFV1_2( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[5], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -401,10 +405,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 7 ***
 
       // Wavefunction(s) for diagram number 5
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_FFV1_1( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[3], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[1], w_fp[3], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -417,10 +421,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 7 ***
 
       // Wavefunction(s) for diagram number 6
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -433,10 +437,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 7 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/check_sa.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc
index e92a990601..fb39071e9a 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc
@@ -215,7 +215,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -227,7 +229,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -343,12 +347,12 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 );
 
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -361,10 +365,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 7 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -377,10 +381,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 7 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -391,10 +395,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 7 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -407,10 +411,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 7 ***
 
       // Wavefunction(s) for diagram number 5
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[3], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -423,10 +427,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 7 ***
 
       // Wavefunction(s) for diagram number 6
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -439,10 +443,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 7 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/check_sa.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc
index 3f6562df2e..f941fbd814 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc
@@ -207,7 +207,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -219,7 +221,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -335,12 +339,12 @@ namespace mg5amcCpu
 
       oxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 );
 
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -353,10 +357,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 14 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -369,10 +373,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 14 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -383,10 +387,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 14 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1_2( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[5], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[5], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -399,10 +403,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 14 ***
 
       // Wavefunction(s) for diagram number 5
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1_1( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[1], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -415,12 +419,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 14 ***
 
       // Wavefunction(s) for diagram number 6
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[9] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
+      helas_FFV1P0_3( w_fp[0], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1P0_3( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1_1( w_fp[2], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -433,10 +437,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 14 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
+      helas_FFV1_2( w_fp[3], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -452,7 +456,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 8
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[6], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -463,10 +467,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 9 OF 14 ***
 
       // Wavefunction(s) for diagram number 9
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_FFV1_2( w_fp[1], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 9
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -479,10 +483,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 14 ***
 
       // Wavefunction(s) for diagram number 10
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_FFV1_1( w_fp[4], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 10
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[10], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[1], w_fp[10], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -495,10 +499,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 14 ***
 
       // Wavefunction(s) for diagram number 11
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_FFV1_2( w_fp[0], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 11
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[5], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[5], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -511,10 +515,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 14 ***
 
       // Wavefunction(s) for diagram number 12
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 12
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[5], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[5], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -527,10 +531,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 13 OF 14 ***
 
       // Wavefunction(s) for diagram number 13
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 13
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[6], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -546,7 +550,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/check_sa.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc
index 942339bab4..b96c2a88fa 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc
@@ -215,7 +215,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -227,7 +229,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -343,12 +347,12 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 );
 
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -361,10 +365,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 7 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -377,10 +381,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 7 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -391,10 +395,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 7 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -407,10 +411,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 7 ***
 
       // Wavefunction(s) for diagram number 5
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[3], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -423,10 +427,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 7 ***
 
       // Wavefunction(s) for diagram number 6
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -439,10 +443,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 7 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/check_sa.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc
index e33137dace..62b6dc7eef 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc
@@ -207,7 +207,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -219,7 +221,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -335,12 +339,12 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 );
 
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -353,10 +357,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 36 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_FFV1_2( w_fp[0], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[8], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -369,10 +373,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 36 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -385,10 +389,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 36 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -401,10 +405,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 36 ***
 
       // Wavefunction(s) for diagram number 5
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -417,11 +421,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 36 ***
 
       // Wavefunction(s) for diagram number 6
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
+      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -432,11 +436,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 36 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[10] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_FFV1_1( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[10], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[0], w_fp[10], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -447,10 +451,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 8 OF 36 ***
 
       // Wavefunction(s) for diagram number 8
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[12] );
+      helas_FFV1_2( w_fp[0], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[12] );
 
       // Amplitude(s) for diagram number 8
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[1], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -461,10 +465,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 9 OF 36 ***
 
       // Wavefunction(s) for diagram number 9
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[9], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_FFV1_1( w_fp[9], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 9
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -478,7 +482,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 10
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[8], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[5], w_fp[8], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -489,11 +493,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 36 ***
 
       // Wavefunction(s) for diagram number 11
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 11
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -504,10 +508,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 36 ***
 
       // Wavefunction(s) for diagram number 12
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 12
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[10], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[0], w_fp[10], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -521,7 +525,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 13
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[1], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -532,10 +536,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 14 OF 36 ***
 
       // Wavefunction(s) for diagram number 14
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[11], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_FFV1_2( w_fp[11], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
 
       // Amplitude(s) for diagram number 14
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -549,7 +553,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 15
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[5], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -560,11 +564,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 16 OF 36 ***
 
       // Wavefunction(s) for diagram number 16
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_FFV1_1( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1P0_3( w_fp[0], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 16
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -578,7 +582,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 17
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -592,7 +596,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 18
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -603,10 +607,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 19 OF 36 ***
 
       // Wavefunction(s) for diagram number 19
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[9], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_FFV1_1( w_fp[9], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 19
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[11], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[0], w_fp[11], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -620,7 +624,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 20
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[5], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -631,11 +635,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 21 OF 36 ***
 
       // Wavefunction(s) for diagram number 21
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[14] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[14], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_FFV1_2( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_FFV1P0_3( w_fp[14], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 21
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -649,7 +653,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 22
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -663,7 +667,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 23
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -674,10 +678,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 24 OF 36 ***
 
       // Wavefunction(s) for diagram number 24
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[14], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1_2( w_fp[14], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 24
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -691,7 +695,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 25
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[5], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -702,10 +706,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 26 OF 36 ***
 
       // Wavefunction(s) for diagram number 26
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[13], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_FFV1_1( w_fp[13], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
 
       // Amplitude(s) for diagram number 26
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -716,10 +720,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 27 OF 36 ***
 
       // Wavefunction(s) for diagram number 27
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] );
+      helas_VVV1P0_1( w_fp[4], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 27
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -730,10 +734,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 28 OF 36 ***
 
       // Wavefunction(s) for diagram number 28
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[6], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_FFV1_2( w_fp[6], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 28
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -747,7 +751,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 29
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -758,10 +762,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 30 OF 36 ***
 
       // Wavefunction(s) for diagram number 30
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[10], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1_1( w_fp[10], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 30
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[0], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -772,10 +776,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 31 OF 36 ***
 
       // Wavefunction(s) for diagram number 31
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_VVV1P0_1( w_fp[4], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 31
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -786,10 +790,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 32 OF 36 ***
 
       // Wavefunction(s) for diagram number 32
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 32
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 32 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -803,7 +807,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 33
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[1], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -817,17 +821,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 34
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= 1. / 2. * amp_sv[0];
       jamp_sv[3] += 1. / 2. * amp_sv[0];
       jamp_sv[4] += 1. / 2. * amp_sv[0];
       jamp_sv[5] -= 1. / 2. * amp_sv[0];
-      VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV3_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= 1. / 2. * amp_sv[0];
       jamp_sv[5] -= 1. / 2. * amp_sv[0];
       jamp_sv[9] += 1. / 2. * amp_sv[0];
       jamp_sv[10] += 1. / 2. * amp_sv[0];
-      VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV4_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= 1. / 2. * amp_sv[0];
       jamp_sv[4] -= 1. / 2. * amp_sv[0];
       jamp_sv[9] += 1. / 2. * amp_sv[0];
@@ -839,7 +843,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 35
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[8], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[5], w_fp[8], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -855,7 +859,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 36
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[5], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/check_sa.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc
index bfef77f27a..e6ac298d60 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc
@@ -207,7 +207,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -219,7 +221,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -335,12 +339,12 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 );
 
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -353,10 +357,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 14 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -369,10 +373,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 14 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -383,10 +387,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 14 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -399,10 +403,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 14 ***
 
       // Wavefunction(s) for diagram number 5
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -415,12 +419,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 14 ***
 
       // Wavefunction(s) for diagram number 6
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[6] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
+      helas_FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1_1( w_fp[2], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -433,10 +437,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 14 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
+      helas_FFV1_2( w_fp[3], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -452,7 +456,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 8
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[6], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -463,10 +467,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 9 OF 14 ***
 
       // Wavefunction(s) for diagram number 9
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_FFV1_2( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 9
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -479,10 +483,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 14 ***
 
       // Wavefunction(s) for diagram number 10
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_FFV1_1( w_fp[1], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 10
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[10], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[10], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -495,10 +499,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 14 ***
 
       // Wavefunction(s) for diagram number 11
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_FFV1_2( w_fp[0], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 11
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -511,10 +515,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 14 ***
 
       // Wavefunction(s) for diagram number 12
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 12
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[4], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[4], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -527,10 +531,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 13 OF 14 ***
 
       // Wavefunction(s) for diagram number 13
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 13
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[6], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -546,7 +550,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/check_sa.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc
index ce462b9537..70e56ccbdf 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc
@@ -209,7 +209,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -221,7 +223,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -337,12 +341,12 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 );
 
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_FFV1P0_3( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -355,10 +359,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 7 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -371,10 +375,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 7 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -385,10 +389,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 7 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -401,10 +405,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 7 ***
 
       // Wavefunction(s) for diagram number 5
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[3], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -417,10 +421,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 7 ***
 
       // Wavefunction(s) for diagram number 6
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_FFV1_2( w_fp[4], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[0], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -433,10 +437,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 7 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_FFV1_2( w_fp[4], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[0], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/check_sa.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc
index 66c50546c0..ff48e18cda 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc
@@ -207,7 +207,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -219,7 +221,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -335,12 +339,12 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 );
 
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_FFV1P0_3( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -353,10 +357,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 14 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -369,10 +373,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 14 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -383,10 +387,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 14 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -399,10 +403,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 14 ***
 
       // Wavefunction(s) for diagram number 5
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -415,12 +419,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 14 ***
 
       // Wavefunction(s) for diagram number 6
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[9] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
+      helas_FFV1P0_3( w_fp[4], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1P0_3( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1_1( w_fp[2], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -433,10 +437,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 14 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
+      helas_FFV1_2( w_fp[3], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -452,7 +456,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 8
-      VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[6], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -463,10 +467,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 9 OF 14 ***
 
       // Wavefunction(s) for diagram number 9
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_FFV1_2( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 9
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[0], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[0], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -479,10 +483,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 14 ***
 
       // Wavefunction(s) for diagram number 10
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_FFV1_1( w_fp[0], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 10
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[10], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[10], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -495,10 +499,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 14 ***
 
       // Wavefunction(s) for diagram number 11
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_FFV1_2( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 11
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -511,10 +515,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 14 ***
 
       // Wavefunction(s) for diagram number 12
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_FFV1_2( w_fp[4], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 12
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[1], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[1], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -527,10 +531,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 13 OF 14 ***
 
       // Wavefunction(s) for diagram number 13
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1_2( w_fp[4], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 13
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[0], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[6], w_fp[0], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -546,7 +550,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[0], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[10], w_fp[0], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/check_sa.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk
index 9cff5e1a60..0d8ea00f01 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk
@@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
@@ -647,7 +650,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -776,6 +778,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
@@ -786,12 +796,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -962,6 +972,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
diff --git a/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h b/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h
index bcf4333c78..24e8114e3a 100644
--- a/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h
@@ -1400,8 +1400,189 @@ namespace mg5amcCpu
     return;
   }
 
+  //==========================================================================
+
+#ifndef MGONGPU_LINKER_HELAMPS
+
+#define helas_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_VVVV3_0 VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
+#define helas_VVVV4_0 VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+
+#else
+
+#define helas_VVV1_0 linker_VVV1_0
+#define helas_VVV1P0_1 linker_VVV1P0_1
+#define helas_FFV1_0 linker_FFV1_0
+#define helas_FFV1_1 linker_FFV1_1
+#define helas_FFV1_2 linker_FFV1_2
+#define helas_FFV1P0_3 linker_FFV1P0_3
+#define helas_VVVV1_0 linker_VVVV1_0
+#define helas_VVVV1P0_1 linker_VVVV1P0_1
+#define helas_VVVV3_0 linker_VVVV3_0
+#define helas_VVVV3P0_1 linker_VVVV3P0_1
+#define helas_VVVV4_0 linker_VVVV4_0
+#define helas_VVVV4P0_1 linker_VVVV4P0_1
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV1_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
   //--------------------------------------------------------------------------
 
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+#endif
+
+  //==========================================================================
+
 } // end namespace
 
 #endif // HelAmps_sm_H
diff --git a/epochX/cudacpp/pp_tt012j.mad/src/cudacpp_config.mk b/epochX/cudacpp/pp_tt012j.mad/src/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/pp_tt012j.mad/src/cudacpp_config.mk
+++ b/epochX/cudacpp/pp_tt012j.mad/src/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuConfig.h
index 858546db00..cf6a228859 100644
--- a/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuConfig.h
+++ b/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -78,9 +78,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -147,6 +154,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt
index d55f30f145..1d163b2ce7 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt
@@ -77,7 +77,7 @@ INFO: load vertices
 [1;32mDEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1) [0m
 [1;32mDEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3) [0m
 [1;32mDEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1) [0m
-[1;32mDEBUG: model prefixing  takes 0.13804030418395996 [0m
+[1;32mDEBUG: model prefixing  takes 0.14316701889038086 [0m
 INFO: Change particles name to pass to MG5 convention 
 Defined multiparticle p = g u c d s u~ c~ d~ s~
 Defined multiparticle j = g u c d s u~ c~ d~ s~
@@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 
 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1  
 INFO: Process has 72 diagrams 
-1 processes with 72 diagrams generated in 3.673 s
+1 processes with 72 diagrams generated in 3.838 s
 Total: 1 processes with 72 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -114,8 +114,8 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 
 INFO: Processing color information for process: g g > t t~ t t~ @1 
 INFO: Creating files in directory P1_gg_ttxttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f22e51fafa0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f99ad249100> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -131,25 +131,25 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ t t~ WEIGHTED<=4 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxttx 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/SubProcesses/P1_gg_ttxttx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 70 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [1;30m[model_handling.py at line 1545][0m [0m
-Generated helas calls for 1 subprocesses (72 diagrams) in 0.185 s
-Wrote files for 119 helas calls in 0.432 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 70 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [1;30m[model_handling.py at line 1614][0m [0m
+Generated helas calls for 1 subprocesses (72 diagrams) in 0.193 s
+Wrote files for 119 helas calls in 0.443 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV5 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV9 routines[0m
 ALOHA: aloha creates VVVV10 routines[0m
-ALOHA: aloha creates 5 routines in  0.317 s
+ALOHA: aloha creates 5 routines in  0.327 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV5 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV9 routines[0m
 ALOHA: aloha creates VVVV10 routines[0m
-ALOHA: aloha creates 10 routines in  0.333 s
+ALOHA: aloha creates 10 routines in  0.341 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -161,6 +161,8 @@ ALOHA: aloha creates 10 routines in  0.333 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV10
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
 INFO: Created file HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.h
@@ -182,10 +184,7 @@ DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_g
 patching file auto_dsig1.f
 patching file driver.f
 patching file matrix1.f
-Hunk #2 succeeded at 191 (offset 48 lines).
-Hunk #3 succeeded at 269 (offset 48 lines).
-Hunk #4 succeeded at 297 (offset 48 lines).
-Hunk #5 succeeded at 342 (offset 48 lines).
+Hunk #2 succeeded at 268 (offset 48 lines).
 [1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 242][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt done.
 Type "launch" to generate events from this process, or see
@@ -193,9 +192,9 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m7.220s
-user	0m6.848s
-sys	0m0.283s
+real	0m7.445s
+user	0m7.100s
+sys	0m0.303s
 Code generation completed in 7 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/MemoryAccessGs.h
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc
index a8d954b7ff..3a68f91378 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc
@@ -204,7 +204,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +218,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -332,12 +336,12 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][5], -1, w_fp[5], 5 );
 
-      VVV5P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_VVV5P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -350,10 +354,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 72 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -366,10 +370,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 72 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 3
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -382,11 +386,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 72 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[11] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_FFV1P0_3( w_fp[5], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -402,7 +406,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[8], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -415,10 +419,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 72 ***
 
       // Wavefunction(s) for diagram number 6
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_FFV1P0_3( w_fp[3], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 6
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[11], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[6], w_fp[11], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -431,10 +435,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 72 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -450,7 +454,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 8
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -466,7 +470,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 9
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[13], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -482,7 +486,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 10
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -495,12 +499,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 72 ***
 
       // Wavefunction(s) for diagram number 11
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_FFV1P0_3( w_fp[5], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 11
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[4], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[4], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -514,7 +518,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 12
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[12], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[12], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -525,11 +529,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 13 OF 72 ***
 
       // Wavefunction(s) for diagram number 13
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_FFV1_1( w_fp[4], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
+      helas_FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 13
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[6], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -543,7 +547,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[6], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -554,10 +558,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 15 OF 72 ***
 
       // Wavefunction(s) for diagram number 15
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
+      helas_FFV1_2( w_fp[5], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
 
       // Amplitude(s) for diagram number 15
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[4], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[4], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -571,7 +575,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 16
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[12], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[12], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -582,10 +586,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 17 OF 72 ***
 
       // Wavefunction(s) for diagram number 17
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
 
       // Amplitude(s) for diagram number 17
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[16], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[16], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -599,7 +603,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 18
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -613,7 +617,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 19
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -627,7 +631,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 20
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[10], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[10], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -638,12 +642,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 21 OF 72 ***
 
       // Wavefunction(s) for diagram number 21
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_FFV1P0_3( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 21
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -657,7 +661,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 22
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -668,10 +672,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 23 OF 72 ***
 
       // Wavefunction(s) for diagram number 23
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[14], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[12] );
+      helas_FFV1P0_3( w_fp[14], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[12] );
 
       // Amplitude(s) for diagram number 23
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[6], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -685,7 +689,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 24
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[6], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[6], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -699,7 +703,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 25
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[4], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[4], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -713,7 +717,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 26
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -724,10 +728,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 27 OF 72 ***
 
       // Wavefunction(s) for diagram number 27
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[14], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
+      helas_FFV1_2( w_fp[14], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
 
       // Amplitude(s) for diagram number 27
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[17], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[17], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -741,7 +745,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 28
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[11], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[11], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -755,7 +759,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 29
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[17], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[17], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -769,7 +773,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 30
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[10], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[10], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -780,11 +784,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 31 OF 72 ***
 
       // Wavefunction(s) for diagram number 31
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] );
+      helas_FFV1_1( w_fp[4], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] );
 
       // Amplitude(s) for diagram number 31
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[16], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[16], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -795,10 +799,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 32 OF 72 ***
 
       // Wavefunction(s) for diagram number 32
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1P0_3( w_fp[5], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 32
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 32 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -812,7 +816,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 33
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -826,7 +830,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 34
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[12], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[12], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 34 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -840,7 +844,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 35
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -854,7 +858,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 36
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[12], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[12], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -865,10 +869,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 37 OF 72 ***
 
       // Wavefunction(s) for diagram number 37
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
 
       // Amplitude(s) for diagram number 37
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[14], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[14], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 37 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -882,7 +886,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 38
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[7], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[7], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 38 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -896,7 +900,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 39
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[14], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 39 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -910,7 +914,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 40
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[11], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[11], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 40 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -921,11 +925,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 41 OF 72 ***
 
       // Wavefunction(s) for diagram number 41
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[17], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_FFV1_2( w_fp[5], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
+      helas_FFV1P0_3( w_fp[17], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 41
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[16], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 41 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -939,7 +943,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 42
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[17], w_fp[16], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[17], w_fp[16], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 42 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -950,10 +954,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 43 OF 72 ***
 
       // Wavefunction(s) for diagram number 43
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[17], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1P0_3( w_fp[17], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 43
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[4], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[4], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 43 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -967,7 +971,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 44
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 44 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -981,7 +985,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 45
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[6], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 45 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -995,7 +999,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 46
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[17], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[17], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 46 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1006,10 +1010,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 47 OF 72 ***
 
       // Wavefunction(s) for diagram number 47
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[17], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_FFV1_2( w_fp[17], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 47
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 47 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1023,7 +1027,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 48
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 48 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1037,7 +1041,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 49
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 49 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1051,7 +1055,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 50
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 50 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1062,10 +1066,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 51 OF 72 ***
 
       // Wavefunction(s) for diagram number 51
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_1( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 51
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 51 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1076,10 +1080,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 52 OF 72 ***
 
       // Wavefunction(s) for diagram number 52
-      VVV5P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[12] );
+      helas_VVV5P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[12] );
 
       // Amplitude(s) for diagram number 52
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[16], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[16], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 52 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1093,7 +1097,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 53
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 53 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1104,10 +1108,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 54 OF 72 ***
 
       // Wavefunction(s) for diagram number 54
-      VVV5P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[9] );
+      helas_VVV5P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 54
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 54 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1118,10 +1122,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 55 OF 72 ***
 
       // Wavefunction(s) for diagram number 55
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
 
       // Amplitude(s) for diagram number 55
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 55 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1132,10 +1136,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 56 OF 72 ***
 
       // Wavefunction(s) for diagram number 56
-      VVV5P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[11], COUPs[0], 1.0, 0., 0., w_fp[14] );
+      helas_VVV5P0_1( w_fp[0], w_fp[11], COUPs[0], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 56
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[4], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[4], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 56 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1149,7 +1153,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 57
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 57 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1163,7 +1167,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 58
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 58 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1174,10 +1178,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 59 OF 72 ***
 
       // Wavefunction(s) for diagram number 59
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_FFV1_1( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 59
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[13], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[13], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 59 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1188,10 +1192,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 60 OF 72 ***
 
       // Wavefunction(s) for diagram number 60
-      VVV5P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[16] );
+      helas_VVV5P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[16] );
 
       // Amplitude(s) for diagram number 60
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[6], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 60 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1205,7 +1209,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 61
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 61 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1219,7 +1223,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 62
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[6], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 62 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1230,10 +1234,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 63 OF 72 ***
 
       // Wavefunction(s) for diagram number 63
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
+      helas_FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
 
       // Amplitude(s) for diagram number 63
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[6], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 63 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1247,7 +1251,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 64
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[4], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[4], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 64 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1261,7 +1265,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 65
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 65 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1275,7 +1279,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 66
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 66 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1289,17 +1293,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 67
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += 1. / 2. * amp_sv[0];
       jamp_sv[2] -= 1. / 2. * amp_sv[0];
       jamp_sv[9] -= 1. / 2. * amp_sv[0];
       jamp_sv[10] += 1. / 2. * amp_sv[0];
-      VVVV9_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV9_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += 1. / 2. * amp_sv[0];
       jamp_sv[5] -= 1. / 2. * amp_sv[0];
       jamp_sv[6] -= 1. / 2. * amp_sv[0];
       jamp_sv[10] += 1. / 2. * amp_sv[0];
-      VVVV10_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV10_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += 1. / 2. * amp_sv[0];
       jamp_sv[5] -= 1. / 2. * amp_sv[0];
       jamp_sv[6] -= 1. / 2. * amp_sv[0];
@@ -1311,7 +1315,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 68
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[10], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[10], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 68 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1327,7 +1331,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 69
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[7], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[7], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 69 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1343,17 +1347,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 70
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= 1. / 2. * amp_sv[0];
       jamp_sv[3] += 1. / 2. * amp_sv[0];
       jamp_sv[8] += 1. / 2. * amp_sv[0];
       jamp_sv[11] -= 1. / 2. * amp_sv[0];
-      VVVV9_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV9_0( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= 1. / 2. * amp_sv[0];
       jamp_sv[4] += 1. / 2. * amp_sv[0];
       jamp_sv[7] += 1. / 2. * amp_sv[0];
       jamp_sv[11] -= 1. / 2. * amp_sv[0];
-      VVVV10_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV10_0( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= 1. / 2. * amp_sv[0];
       jamp_sv[4] += 1. / 2. * amp_sv[0];
       jamp_sv[7] += 1. / 2. * amp_sv[0];
@@ -1365,7 +1369,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 71
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[8], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 71 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1381,7 +1385,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 72
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[11], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[11], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 72 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/check_sa.cc b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/check_sa.cc
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk
index 9cff5e1a60..0d8ea00f01 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk
@@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
@@ -647,7 +650,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -776,6 +778,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
@@ -786,12 +796,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -962,6 +972,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h b/epochX/cudacpp/smeft_gg_tttt.mad/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
index b1af58c440..69e1c0cf7b 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
@@ -1253,8 +1253,144 @@ namespace mg5amcCpu
     return;
   }
 
+  //==========================================================================
+
+#ifndef MGONGPU_LINKER_HELAMPS
+
+#define helas_VVV5_0 VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVV5P0_1 VVV5P0_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVVV9_0 VVVV9_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVVV10_0 VVVV10_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+
+#else
+
+#define helas_VVV5_0 linker_VVV5_0
+#define helas_VVV5P0_1 linker_VVV5P0_1
+#define helas_FFV1_0 linker_FFV1_0
+#define helas_FFV1_1 linker_FFV1_1
+#define helas_FFV1_2 linker_FFV1_2
+#define helas_FFV1P0_3 linker_FFV1P0_3
+#define helas_VVVV1_0 linker_VVVV1_0
+#define helas_VVVV9_0 linker_VVVV9_0
+#define helas_VVVV10_0 linker_VVVV10_0
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV5_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV5P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] );
+
   //--------------------------------------------------------------------------
 
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV9_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV10_0( const fptype allV1[],
+                   const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allV4[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+#endif
+
+  //==========================================================================
+
 } // end namespace
 
 #endif // HelAmps_SMEFTsim_topU3l_MwScheme_UFO_H
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/src/cudacpp_config.mk b/epochX/cudacpp/smeft_gg_tttt.mad/src/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/src/cudacpp_config.mk
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/src/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/smeft_gg_tttt.mad/src/mgOnGpuConfig.h
index 858546db00..cf6a228859 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/src/mgOnGpuConfig.h
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/src/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -78,9 +78,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -147,6 +154,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
index 4fb7228286..7f7526e856 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
@@ -77,7 +77,7 @@ INFO: load vertices
 [1;32mDEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1) [0m
 [1;32mDEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3) [0m
 [1;32mDEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1) [0m
-[1;32mDEBUG: model prefixing  takes 0.13859224319458008 [0m
+[1;32mDEBUG: model prefixing  takes 0.14350485801696777 [0m
 INFO: Change particles name to pass to MG5 convention 
 Defined multiparticle p = g u c d s u~ c~ d~ s~
 Defined multiparticle j = g u c d s u~ c~ d~ s~
@@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 
 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1  
 INFO: Process has 72 diagrams 
-1 processes with 72 diagrams generated in 3.821 s
+1 processes with 72 diagrams generated in 3.857 s
 Total: 1 processes with 72 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt
 Load PLUGIN.CUDACPP_OUTPUT
@@ -115,14 +115,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. 
-Generated helas calls for 1 subprocesses (72 diagrams) in 0.186 s
+Generated helas calls for 1 subprocesses (72 diagrams) in 0.198 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV5 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV9 routines[0m
 ALOHA: aloha creates VVVV10 routines[0m
-ALOHA: aloha creates 5 routines in  0.316 s
+ALOHA: aloha creates 5 routines in  0.329 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -134,6 +134,8 @@ ALOHA: aloha creates 5 routines in  0.316 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV10
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
 INFO: Created file HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.h
@@ -142,7 +144,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. 
 quit
 
-real	0m5.206s
-user	0m5.107s
-sys	0m0.076s
-Code generation completed in 5 seconds
+real	0m5.460s
+user	0m5.214s
+sys	0m0.079s
+Code generation completed in 6 seconds
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessGs.h
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc
index c0c2f73b73..2946a93055 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc
@@ -204,7 +204,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +218,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -332,12 +336,12 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][5], -1, w_fp[5], 5 );
 
-      VVV5P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_VVV5P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -349,10 +353,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 72 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -364,10 +368,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 72 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 3
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -379,11 +383,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 72 ***
 
       // Wavefunction(s) for diagram number 4
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[11] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_FFV1P0_3( w_fp[5], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -398,7 +402,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[8], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -410,10 +414,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 72 ***
 
       // Wavefunction(s) for diagram number 6
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_FFV1P0_3( w_fp[3], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 6
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[11], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[6], w_fp[11], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -425,10 +429,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 72 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -443,7 +447,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 8
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -458,7 +462,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 9
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[13], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -473,7 +477,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 10
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -485,12 +489,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 72 ***
 
       // Wavefunction(s) for diagram number 11
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_FFV1P0_3( w_fp[5], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 11
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[4], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[4], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -503,7 +507,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 12
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[12], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[12], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -513,11 +517,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 13 OF 72 ***
 
       // Wavefunction(s) for diagram number 13
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_FFV1_1( w_fp[4], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
+      helas_FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 13
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[6], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -530,7 +534,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[6], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -540,10 +544,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 15 OF 72 ***
 
       // Wavefunction(s) for diagram number 15
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
+      helas_FFV1_2( w_fp[5], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
 
       // Amplitude(s) for diagram number 15
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[4], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[4], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -556,7 +560,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 16
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[12], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[12], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -566,10 +570,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 17 OF 72 ***
 
       // Wavefunction(s) for diagram number 17
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
 
       // Amplitude(s) for diagram number 17
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[16], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[16], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -582,7 +586,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 18
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -595,7 +599,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 19
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -608,7 +612,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 20
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[10], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[10], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -618,12 +622,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 21 OF 72 ***
 
       // Wavefunction(s) for diagram number 21
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_FFV1P0_3( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 21
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -636,7 +640,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 22
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -646,10 +650,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 23 OF 72 ***
 
       // Wavefunction(s) for diagram number 23
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[14], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[12] );
+      helas_FFV1P0_3( w_fp[14], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[12] );
 
       // Amplitude(s) for diagram number 23
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[6], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -662,7 +666,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 24
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[14], w_fp[6], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[14], w_fp[6], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -675,7 +679,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 25
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[4], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[4], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -688,7 +692,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 26
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -698,10 +702,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 27 OF 72 ***
 
       // Wavefunction(s) for diagram number 27
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[14], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
+      helas_FFV1_2( w_fp[14], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
 
       // Amplitude(s) for diagram number 27
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[17], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[17], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -714,7 +718,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 28
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[11], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[11], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -727,7 +731,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 29
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[17], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[17], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -740,7 +744,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 30
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[10], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[10], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -750,11 +754,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 31 OF 72 ***
 
       // Wavefunction(s) for diagram number 31
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[4], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] );
+      helas_FFV1_1( w_fp[4], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] );
 
       // Amplitude(s) for diagram number 31
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[16], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[16], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -764,10 +768,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 32 OF 72 ***
 
       // Wavefunction(s) for diagram number 32
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1P0_3( w_fp[5], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 32
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -780,7 +784,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 33
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -793,7 +797,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 34
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[12], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[12], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -806,7 +810,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 35
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -819,7 +823,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 36
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[12], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[12], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -829,10 +833,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 37 OF 72 ***
 
       // Wavefunction(s) for diagram number 37
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
 
       // Amplitude(s) for diagram number 37
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[14], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[14], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -845,7 +849,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 38
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[7], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[7], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -858,7 +862,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 39
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[14], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -871,7 +875,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 40
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[11], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[11], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -881,11 +885,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 41 OF 72 ***
 
       // Wavefunction(s) for diagram number 41
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[17], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_FFV1_2( w_fp[5], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
+      helas_FFV1P0_3( w_fp[17], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 41
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[16], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -898,7 +902,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 42
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[17], w_fp[16], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[17], w_fp[16], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -908,10 +912,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 43 OF 72 ***
 
       // Wavefunction(s) for diagram number 43
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[17], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_FFV1P0_3( w_fp[17], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 43
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[4], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[4], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -924,7 +928,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 44
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -937,7 +941,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 45
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[6], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -950,7 +954,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 46
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[17], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[17], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -960,10 +964,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 47 OF 72 ***
 
       // Wavefunction(s) for diagram number 47
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[17], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_FFV1_2( w_fp[17], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 47
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -976,7 +980,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 48
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -989,7 +993,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 49
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1002,7 +1006,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 50
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1012,10 +1016,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 51 OF 72 ***
 
       // Wavefunction(s) for diagram number 51
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_FFV1_1( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 51
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1025,10 +1029,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 52 OF 72 ***
 
       // Wavefunction(s) for diagram number 52
-      VVV5P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[12] );
+      helas_VVV5P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[12] );
 
       // Amplitude(s) for diagram number 52
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[16], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[16], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1041,7 +1045,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 53
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[9], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1051,10 +1055,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 54 OF 72 ***
 
       // Wavefunction(s) for diagram number 54
-      VVV5P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[9] );
+      helas_VVV5P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 54
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1064,10 +1068,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 55 OF 72 ***
 
       // Wavefunction(s) for diagram number 55
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
 
       // Amplitude(s) for diagram number 55
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1077,10 +1081,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 56 OF 72 ***
 
       // Wavefunction(s) for diagram number 56
-      VVV5P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[11], COUPs[0], 1.0, 0., 0., w_fp[14] );
+      helas_VVV5P0_1( w_fp[0], w_fp[11], COUPs[0], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 56
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[4], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[4], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1093,7 +1097,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 57
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[16], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1106,7 +1110,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 58
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[13], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1116,10 +1120,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 59 OF 72 ***
 
       // Wavefunction(s) for diagram number 59
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_FFV1_1( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 59
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[13], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[13], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1129,10 +1133,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 60 OF 72 ***
 
       // Wavefunction(s) for diagram number 60
-      VVV5P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[16] );
+      helas_VVV5P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[16] );
 
       // Amplitude(s) for diagram number 60
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[6], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[5], w_fp[6], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1145,7 +1149,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 61
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1158,7 +1162,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 62
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[6], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1168,10 +1172,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 63 OF 72 ***
 
       // Wavefunction(s) for diagram number 63
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
+      helas_FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
 
       // Amplitude(s) for diagram number 63
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[6], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1184,7 +1188,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 64
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[4], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[4], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1197,7 +1201,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 65
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1210,7 +1214,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 66
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1223,7 +1227,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 67
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1231,7 +1235,7 @@ namespace mg5amcCpu
       jamp_sv[2] -= 1. / 2. * amp_sv[0];
       jamp_sv[9] -= 1. / 2. * amp_sv[0];
       jamp_sv[10] += 1. / 2. * amp_sv[0];
-      VVVV9_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV9_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1239,7 +1243,7 @@ namespace mg5amcCpu
       jamp_sv[5] -= 1. / 2. * amp_sv[0];
       jamp_sv[6] -= 1. / 2. * amp_sv[0];
       jamp_sv[10] += 1. / 2. * amp_sv[0];
-      VVVV10_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV10_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1254,7 +1258,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 68
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[10], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[10], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1269,7 +1273,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 69
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[7], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[7], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1284,7 +1288,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 70
-      VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1292,7 +1296,7 @@ namespace mg5amcCpu
       jamp_sv[3] += 1. / 2. * amp_sv[0];
       jamp_sv[8] += 1. / 2. * amp_sv[0];
       jamp_sv[11] -= 1. / 2. * amp_sv[0];
-      VVVV9_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV9_0( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1300,7 +1304,7 @@ namespace mg5amcCpu
       jamp_sv[4] += 1. / 2. * amp_sv[0];
       jamp_sv[7] += 1. / 2. * amp_sv[0];
       jamp_sv[11] -= 1. / 2. * amp_sv[0];
-      VVVV10_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VVVV10_0( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1315,7 +1319,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 71
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[8], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1330,7 +1334,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 72
-      VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[11], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVV5_0( w_fp[1], w_fp[11], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/check_sa.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/check_sa.cc
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk
index 9cff5e1a60..0d8ea00f01 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk
@@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
@@ -647,7 +650,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -776,6 +778,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
@@ -786,12 +796,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -962,6 +972,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h b/epochX/cudacpp/smeft_gg_tttt.sa/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
index b1af58c440..69e1c0cf7b 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
@@ -1253,8 +1253,144 @@ namespace mg5amcCpu
     return;
   }
 
+  //==========================================================================
+
+#ifndef MGONGPU_LINKER_HELAMPS
+
+#define helas_VVV5_0 VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVV5P0_1 VVV5P0_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVVV9_0 VVVV9_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VVVV10_0 VVVV10_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+
+#else
+
+#define helas_VVV5_0 linker_VVV5_0
+#define helas_VVV5P0_1 linker_VVV5P0_1
+#define helas_FFV1_0 linker_FFV1_0
+#define helas_FFV1_1 linker_FFV1_1
+#define helas_FFV1_2 linker_FFV1_2
+#define helas_FFV1P0_3 linker_FFV1P0_3
+#define helas_VVVV1_0 linker_VVVV1_0
+#define helas_VVVV9_0 linker_VVVV9_0
+#define helas_VVVV10_0 linker_VVVV10_0
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV5_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV5P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] );
+
   //--------------------------------------------------------------------------
 
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV9_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV10_0( const fptype allV1[],
+                   const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allV4[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+#endif
+
+  //==========================================================================
+
 } // end namespace
 
 #endif // HelAmps_SMEFTsim_topU3l_MwScheme_UFO_H
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/cudacpp_config.mk b/epochX/cudacpp/smeft_gg_tttt.sa/src/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/src/cudacpp_config.mk
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuConfig.h b/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuConfig.h
index ad528bf8f3..5d79a575e7 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuConfig.h
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -78,9 +78,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -147,6 +154,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt
index 49e61427c5..c26505d3a3 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt
@@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1  
 INFO: Process has 6 diagrams 
-1 processes with 6 diagrams generated in 0.130 s
+1 processes with 6 diagrams generated in 0.128 s
 Total: 1 processes with 6 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -576,8 +576,8 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t1 t1~ @1 
 INFO: Creating files in directory P1_gg_t1t1x 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f1bae241100> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7ff7c7593a90> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -593,21 +593,21 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t1 t1~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_t1t1x 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses/P1_gg_t1t1x [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [1;30m[model_handling.py at line 1614][0m [0m
 Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s
-Wrote files for 16 helas calls in 0.126 s
+Wrote files for 16 helas calls in 0.128 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VSS1 routines[0m
 ALOHA: aloha creates VVSS1 routines[0m
-ALOHA: aloha creates 3 routines in  0.184 s
+ALOHA: aloha creates 3 routines in  0.190 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VSS1 routines[0m
 ALOHA: aloha creates VVSS1 routines[0m
-ALOHA: aloha creates 6 routines in  0.182 s
+ALOHA: aloha creates 6 routines in  0.187 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
@@ -615,6 +615,8 @@ ALOHA: aloha creates 6 routines in  0.182 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVSS1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./HelAmps_MSSM_SLHA2.h
 INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.h
@@ -636,10 +638,7 @@ DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg
 patching file auto_dsig1.f
 patching file driver.f
 patching file matrix1.f
-Hunk #2 succeeded at 131 (offset -12 lines).
-Hunk #3 succeeded at 209 (offset -12 lines).
-Hunk #4 succeeded at 237 (offset -12 lines).
-Hunk #5 succeeded at 282 (offset -12 lines).
+Hunk #2 succeeded at 208 (offset -12 lines).
 [1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 242][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1 done.
 Type "launch" to generate events from this process, or see
@@ -647,10 +646,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m3.278s
-user	0m2.733s
-sys	0m0.284s
-Code generation completed in 4 seconds
+real	0m3.084s
+user	0m2.771s
+sys	0m0.313s
+Code generation completed in 3 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/MemoryAccessGs.h
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc
index 8eb9974c35..5185b0d399 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc
@@ -204,7 +204,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +218,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -329,18 +333,18 @@ namespace mg5amcCpu
       sxxxxx<M_ACCESS, W_ACCESS>( momenta, +1, w_fp[3], 3 );
 
       // Amplitude(s) for diagram number 1
-      VVSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[3], w_fp[2], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVSS1_0( w_fp[0], w_fp[1], w_fp[3], w_fp[2], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
-      VVSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[3], w_fp[2], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVSS1_0( w_fp[0], w_fp[1], w_fp[3], w_fp[2], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
 
       // *** DIAGRAM 2 OF 6 ***
 
       // Wavefunction(s) for diagram number 2
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[4] );
+      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      VSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[3], w_fp[2], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VSS1_0( w_fp[4], w_fp[3], w_fp[2], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -351,10 +355,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 6 ***
 
       // Wavefunction(s) for diagram number 3
-      VSS1_2<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[2], COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_VSS1_2( w_fp[0], w_fp[2], COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 3
-      VSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[3], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VSS1_0( w_fp[1], w_fp[3], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -364,10 +368,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 6 ***
 
       // Wavefunction(s) for diagram number 4
-      VSS1_3<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[2], COUPs[3], -1.0, cIPD[2], cIPD[3], w_fp[4] );
+      helas_VSS1_3( w_fp[0], w_fp[2], COUPs[3], -1.0, cIPD[2], cIPD[3], w_fp[4] );
 
       // Amplitude(s) for diagram number 4
-      VSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[3], w_fp[4], COUPs[3], 1.0, &amp_fp[0] );
+      helas_VSS1_0( w_fp[1], w_fp[3], w_fp[4], COUPs[3], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -377,10 +381,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 6 ***
 
       // Wavefunction(s) for diagram number 5
-      VSS1_3<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[3], COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_VSS1_3( w_fp[0], w_fp[3], COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 5
-      VSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[2], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VSS1_0( w_fp[1], w_fp[4], w_fp[2], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -390,10 +394,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 6 ***
 
       // Wavefunction(s) for diagram number 6
-      VSS1_3<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[3], COUPs[3], 1.0, cIPD[2], cIPD[3], w_fp[4] );
+      helas_VSS1_3( w_fp[0], w_fp[3], COUPs[3], 1.0, cIPD[2], cIPD[3], w_fp[4] );
 
       // Amplitude(s) for diagram number 6
-      VSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[2], w_fp[4], COUPs[3], -1.0, &amp_fp[0] );
+      helas_VSS1_0( w_fp[1], w_fp[2], w_fp[4], COUPs[3], -1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/check_sa.cc b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/check_sa.cc
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk
index 9cff5e1a60..0d8ea00f01 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk
@@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
@@ -647,7 +650,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -776,6 +778,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
@@ -786,12 +796,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -962,6 +972,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h
index dc96852e85..ed348cf878 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h
@@ -1073,8 +1073,89 @@ namespace mg5amcCpu
     return;
   }
 
+  //==========================================================================
+
+#ifndef MGONGPU_LINKER_HELAMPS
+
+#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_VSS1_0 VSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VSS1_2 VSS1_2<W_ACCESS, CD_ACCESS>
+#define helas_VSS1_3 VSS1_3<W_ACCESS, CD_ACCESS>
+#define helas_VVSS1_0 VVSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+
+#else
+
+#define helas_VVV1P0_1 linker_VVV1P0_1
+#define helas_VSS1_0 linker_VSS1_0
+#define helas_VSS1_2 linker_VSS1_2
+#define helas_VSS1_3 linker_VSS1_3
+#define helas_VVSS1_0 linker_VVSS1_0
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], S2[6], S3[6]
+  __device__ void
+  linker_VSS1_0( const fptype allV1[],
+                 const fptype allS2[],
+                 const fptype allS3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'S2[6]' from the input wavefunctions V1[6], S3[6]
+  __device__ void
+  linker_VSS1_2( const fptype allV1[],
+                 const fptype allS3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allS2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], S2[6]
+  __device__ void
+  linker_VSS1_3( const fptype allV1[],
+                 const fptype allS2[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M3,
+                 const fptype W3,
+                 fptype allS3[] );
+
   //--------------------------------------------------------------------------
 
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], S3[6], S4[6]
+  __device__ void
+  linker_VVSS1_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allS3[],
+                  const fptype allS4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+#endif
+
+  //==========================================================================
+
 } // end namespace
 
 #endif // HelAmps_MSSM_SLHA2_H
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/src/cudacpp_config.mk b/epochX/cudacpp/susy_gg_t1t1.mad/src/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/src/cudacpp_config.mk
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/src/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/susy_gg_t1t1.mad/src/mgOnGpuConfig.h
index 858546db00..cf6a228859 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/src/mgOnGpuConfig.h
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/src/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -78,9 +78,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -147,6 +154,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {
diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt
index 1085728e17..1d876e6d5b 100644
--- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt
+++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt
@@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1  
 INFO: Process has 6 diagrams 
-1 processes with 6 diagrams generated in 0.124 s
+1 processes with 6 diagrams generated in 0.129 s
 Total: 1 processes with 6 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1
 Load PLUGIN.CUDACPP_OUTPUT
@@ -582,7 +582,7 @@ ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VSS1 routines[0m
 ALOHA: aloha creates VVSS1 routines[0m
-ALOHA: aloha creates 3 routines in  0.183 s
+ALOHA: aloha creates 3 routines in  0.190 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
@@ -590,6 +590,8 @@ ALOHA: aloha creates 3 routines in  0.183 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVSS1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./HelAmps_MSSM_SLHA2.h
 INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.h
@@ -598,7 +600,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. 
 quit
 
-real	0m1.401s
-user	0m1.286s
-sys	0m0.057s
+real	0m1.386s
+user	0m1.297s
+sys	0m0.081s
 Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/MemoryAccessGs.h
+++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 
diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.cc b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.cc
index 3c0dfad317..1b8627679f 100644
--- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.cc
+++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.cc
@@ -204,7 +204,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +218,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -329,12 +333,12 @@ namespace mg5amcCpu
       sxxxxx<M_ACCESS, W_ACCESS>( momenta, +1, w_fp[3], 3 );
 
       // Amplitude(s) for diagram number 1
-      VVSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[3], w_fp[2], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVSS1_0( w_fp[0], w_fp[1], w_fp[3], w_fp[2], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
       jamp_sv[1] += amp_sv[0];
-      VVSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], w_fp[3], w_fp[2], COUPs[0], 1.0, &amp_fp[0] );
+      helas_VVSS1_0( w_fp[0], w_fp[1], w_fp[3], w_fp[2], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -343,10 +347,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 6 ***
 
       // Wavefunction(s) for diagram number 2
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[4] );
+      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      VSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[3], w_fp[2], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VSS1_0( w_fp[4], w_fp[3], w_fp[2], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -356,10 +360,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 6 ***
 
       // Wavefunction(s) for diagram number 3
-      VSS1_2<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[2], COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_VSS1_2( w_fp[0], w_fp[2], COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 3
-      VSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[3], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VSS1_0( w_fp[1], w_fp[3], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -368,10 +372,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 6 ***
 
       // Wavefunction(s) for diagram number 4
-      VSS1_3<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[2], COUPs[3], -1.0, cIPD[2], cIPD[3], w_fp[4] );
+      helas_VSS1_3( w_fp[0], w_fp[2], COUPs[3], -1.0, cIPD[2], cIPD[3], w_fp[4] );
 
       // Amplitude(s) for diagram number 4
-      VSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[3], w_fp[4], COUPs[3], 1.0, &amp_fp[0] );
+      helas_VSS1_0( w_fp[1], w_fp[3], w_fp[4], COUPs[3], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -380,10 +384,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 6 ***
 
       // Wavefunction(s) for diagram number 5
-      VSS1_3<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[3], COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_VSS1_3( w_fp[0], w_fp[3], COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 5
-      VSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[4], w_fp[2], COUPs[2], 1.0, &amp_fp[0] );
+      helas_VSS1_0( w_fp[1], w_fp[4], w_fp[2], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -392,10 +396,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 6 ***
 
       // Wavefunction(s) for diagram number 6
-      VSS1_3<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[3], COUPs[3], 1.0, cIPD[2], cIPD[3], w_fp[4] );
+      helas_VSS1_3( w_fp[0], w_fp[3], COUPs[3], 1.0, cIPD[2], cIPD[3], w_fp[4] );
 
       // Amplitude(s) for diagram number 6
-      VSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[2], w_fp[4], COUPs[3], -1.0, &amp_fp[0] );
+      helas_VSS1_0( w_fp[1], w_fp[2], w_fp[4], COUPs[3], -1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/check_sa.cc b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/check_sa.cc
+++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk
index 9cff5e1a60..0d8ea00f01 100644
--- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk
@@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
@@ -647,7 +650,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -776,6 +778,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
@@ -786,12 +796,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -962,6 +972,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/src/HelAmps_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_t1t1.sa/src/HelAmps_MSSM_SLHA2.h
index dc96852e85..ed348cf878 100644
--- a/epochX/cudacpp/susy_gg_t1t1.sa/src/HelAmps_MSSM_SLHA2.h
+++ b/epochX/cudacpp/susy_gg_t1t1.sa/src/HelAmps_MSSM_SLHA2.h
@@ -1073,8 +1073,89 @@ namespace mg5amcCpu
     return;
   }
 
+  //==========================================================================
+
+#ifndef MGONGPU_LINKER_HELAMPS
+
+#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_VSS1_0 VSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_VSS1_2 VSS1_2<W_ACCESS, CD_ACCESS>
+#define helas_VSS1_3 VSS1_3<W_ACCESS, CD_ACCESS>
+#define helas_VVSS1_0 VVSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+
+#else
+
+#define helas_VVV1P0_1 linker_VVV1P0_1
+#define helas_VSS1_0 linker_VSS1_0
+#define helas_VSS1_2 linker_VSS1_2
+#define helas_VSS1_3 linker_VSS1_3
+#define helas_VVSS1_0 linker_VVSS1_0
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], S2[6], S3[6]
+  __device__ void
+  linker_VSS1_0( const fptype allV1[],
+                 const fptype allS2[],
+                 const fptype allS3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'S2[6]' from the input wavefunctions V1[6], S3[6]
+  __device__ void
+  linker_VSS1_2( const fptype allV1[],
+                 const fptype allS3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allS2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], S2[6]
+  __device__ void
+  linker_VSS1_3( const fptype allV1[],
+                 const fptype allS2[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M3,
+                 const fptype W3,
+                 fptype allS3[] );
+
   //--------------------------------------------------------------------------
 
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], S3[6], S4[6]
+  __device__ void
+  linker_VVSS1_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allS3[],
+                  const fptype allS4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+#endif
+
+  //==========================================================================
+
 } // end namespace
 
 #endif // HelAmps_MSSM_SLHA2_H
diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/src/cudacpp_config.mk b/epochX/cudacpp/susy_gg_t1t1.sa/src/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/susy_gg_t1t1.sa/src/cudacpp_config.mk
+++ b/epochX/cudacpp/susy_gg_t1t1.sa/src/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/src/mgOnGpuConfig.h b/epochX/cudacpp/susy_gg_t1t1.sa/src/mgOnGpuConfig.h
index ad528bf8f3..5d79a575e7 100644
--- a/epochX/cudacpp/susy_gg_t1t1.sa/src/mgOnGpuConfig.h
+++ b/epochX/cudacpp/susy_gg_t1t1.sa/src/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -78,9 +78,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -147,6 +154,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {
diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt
index a1082c61f1..1c56a0ab14 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt
+++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt
@@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.119 s
+1 processes with 3 diagrams generated in 0.124 s
 Total: 1 processes with 3 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -576,8 +576,8 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
 INFO: Creating files in directory P1_gg_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f4cda57b040> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f933000ec40> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -593,25 +593,27 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/SubProcesses/P1_gg_ttx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1589][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1613][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1614][0m [0m
 Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
-Wrote files for 10 helas calls in 0.116 s
+Wrote files for 10 helas calls in 0.122 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.139 s
+ALOHA: aloha creates 2 routines in  0.143 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 4 routines in  0.135 s
+ALOHA: aloha creates 4 routines in  0.137 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./HelAmps_MSSM_SLHA2.h
 INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./Parameters_MSSM_SLHA2.h
@@ -640,10 +642,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.872s
-user	0m2.564s
-sys	0m0.301s
-Code generation completed in 2 seconds
+real	0m2.966s
+user	0m2.670s
+sys	0m0.293s
+Code generation completed in 3 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/MemoryAccessGs.h
+++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 
diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
index ead501d309..fe52e62418 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
+++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
@@ -204,7 +204,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +218,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -328,10 +332,10 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 );
 
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
+      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], w_fp[4], COUPs[1], -1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], -1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -342,10 +346,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 3 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], -1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], -1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], w_fp[1], COUPs[1], -1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], -1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -355,10 +359,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 3 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], -1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], -1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[2], w_fp[1], COUPs[1], -1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], -1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc
+++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk
index 9cff5e1a60..0d8ea00f01 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk
@@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
@@ -647,7 +650,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -776,6 +778,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
@@ -786,12 +796,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -962,6 +972,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
diff --git a/epochX/cudacpp/susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h
index 2519e3902b..f9d3884aea 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h
+++ b/epochX/cudacpp/susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h
@@ -1032,8 +1032,75 @@ namespace mg5amcCpu
     return;
   }
 
+  //==========================================================================
+
+#ifndef MGONGPU_LINKER_HELAMPS
+
+#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+
+#else
+
+#define helas_VVV1P0_1 linker_VVV1P0_1
+#define helas_FFV1_0 linker_FFV1_0
+#define helas_FFV1_1 linker_FFV1_1
+#define helas_FFV1_2 linker_FFV1_2
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] );
+
   //--------------------------------------------------------------------------
 
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+#endif
+
+  //==========================================================================
+
 } // end namespace
 
 #endif // HelAmps_MSSM_SLHA2_H
diff --git a/epochX/cudacpp/susy_gg_tt.mad/src/cudacpp_config.mk b/epochX/cudacpp/susy_gg_tt.mad/src/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/src/cudacpp_config.mk
+++ b/epochX/cudacpp/susy_gg_tt.mad/src/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/susy_gg_tt.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/susy_gg_tt.mad/src/mgOnGpuConfig.h
index 858546db00..cf6a228859 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/src/mgOnGpuConfig.h
+++ b/epochX/cudacpp/susy_gg_tt.mad/src/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -78,9 +78,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -147,6 +154,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {
diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt
index 8479028997..fb28c3393c 100644
--- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt
+++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt
@@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.121 s
+1 processes with 3 diagrams generated in 0.122 s
 Total: 1 processes with 3 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt
 Load PLUGIN.CUDACPP_OUTPUT
@@ -581,13 +581,15 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.136 s
+ALOHA: aloha creates 2 routines in  0.142 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./HelAmps_MSSM_SLHA2.h
 INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./Parameters_MSSM_SLHA2.h
@@ -596,7 +598,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. 
 quit
 
-real	0m1.278s
-user	0m1.188s
-sys	0m0.072s
+real	0m1.324s
+user	0m1.248s
+sys	0m0.069s
 Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/MemoryAccessGs.h
+++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 
diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc
index 68cae1dff1..a33ad778ac 100644
--- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc
+++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc
@@ -204,7 +204,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -216,7 +218,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -328,10 +332,10 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 );
 
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
+      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], w_fp[4], COUPs[1], -1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], -1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -341,10 +345,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 3 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[1], -1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], -1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[3], w_fp[4], w_fp[1], COUPs[1], -1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], -1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -353,10 +357,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 3 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[1], -1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], -1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[4], w_fp[2], w_fp[1], COUPs[1], -1.0, &amp_fp[0] );
+      helas_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], -1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/check_sa.cc b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/check_sa.cc
index d6312eaeeb..39c4a6421b 100644
--- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/check_sa.cc
+++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/check_sa.cc
@@ -958,6 +958,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk
index 9cff5e1a60..0d8ea00f01 100644
--- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk
@@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
@@ -647,7 +650,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -776,6 +778,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
@@ -786,12 +796,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -962,6 +972,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
diff --git a/epochX/cudacpp/susy_gg_tt.sa/src/HelAmps_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_tt.sa/src/HelAmps_MSSM_SLHA2.h
index 2519e3902b..f9d3884aea 100644
--- a/epochX/cudacpp/susy_gg_tt.sa/src/HelAmps_MSSM_SLHA2.h
+++ b/epochX/cudacpp/susy_gg_tt.sa/src/HelAmps_MSSM_SLHA2.h
@@ -1032,8 +1032,75 @@ namespace mg5amcCpu
     return;
   }
 
+  //==========================================================================
+
+#ifndef MGONGPU_LINKER_HELAMPS
+
+#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+
+#else
+
+#define helas_VVV1P0_1 linker_VVV1P0_1
+#define helas_FFV1_0 linker_FFV1_0
+#define helas_FFV1_1 linker_FFV1_1
+#define helas_FFV1_2 linker_FFV1_2
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] );
+
   //--------------------------------------------------------------------------
 
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+#endif
+
+  //==========================================================================
+
 } // end namespace
 
 #endif // HelAmps_MSSM_SLHA2_H
diff --git a/epochX/cudacpp/susy_gg_tt.sa/src/cudacpp_config.mk b/epochX/cudacpp/susy_gg_tt.sa/src/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/susy_gg_tt.sa/src/cudacpp_config.mk
+++ b/epochX/cudacpp/susy_gg_tt.sa/src/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/susy_gg_tt.sa/src/mgOnGpuConfig.h b/epochX/cudacpp/susy_gg_tt.sa/src/mgOnGpuConfig.h
index ad528bf8f3..5d79a575e7 100644
--- a/epochX/cudacpp/susy_gg_tt.sa/src/mgOnGpuConfig.h
+++ b/epochX/cudacpp/susy_gg_tt.sa/src/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -78,9 +78,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -147,6 +154,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {

From f0a51050d8fd24dad561bb591c50618efc81a567 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 20:55:20 +0200
Subject: [PATCH 22/50] [helas] in tmad madX.sh and teeMadX.sh, add -inlonly
 and -inlLonly options

---
 epochX/cudacpp/tmad/madX.sh    | 35 +++++++++++++++++++++++++---------
 epochX/cudacpp/tmad/teeMadX.sh | 27 +++++++++++++++++---------
 2 files changed, 44 insertions(+), 18 deletions(-)

diff --git a/epochX/cudacpp/tmad/madX.sh b/epochX/cudacpp/tmad/madX.sh
index 6bd9b7daf1..83158f2c5b 100755
--- a/epochX/cudacpp/tmad/madX.sh
+++ b/epochX/cudacpp/tmad/madX.sh
@@ -32,7 +32,7 @@ export CUDACPP_RUNTIME_VECSIZEUSED=${NLOOP}
 
 function usage()
 {
-  echo "Usage: $0 <processes [-eemumu][-ggtt][-ggttg][-ggttgg][-ggttggg][-gguu][-gqttq][-heftggbb][-susyggtt][-susyggt1t1][-smeftggtttt]> [-d] [-fltonly|-mixonly] [-makeonly|-makeclean|-makecleanonly] [-rmrdat] [+10x] [-checkonly] [-nocleanup][-iconfig <iconfig>]" > /dev/stderr
+  echo "Usage: $0 <processes [-eemumu][-ggtt][-ggttg][-ggttgg][-ggttggg][-gguu][-gqttq][-heftggbb][-susyggtt][-susyggt1t1][-smeftggtttt]> [-d] [-fltonly|-mixonly] [-inlonly|-inlLonly] [-makeonly|-makeclean|-makecleanonly] [-rmrdat] [+10x] [-checkonly] [-nocleanup][-iconfig <iconfig>]" > /dev/stderr
   echo "(NB: OMP_NUM_THREADS is taken as-is from the caller's environment)"
   exit 1
 }
@@ -57,6 +57,8 @@ smeftggtttt=0
 
 fptype="d"
 
+helinl="0"
+
 maketype=
 ###makej=
 
@@ -119,6 +121,18 @@ while [ "$1" != "" ]; do
     fi
     fptype="m"
     shift
+  elif [ "$1" == "-inlonly" ]; then
+    if [ "${helinl}" != "0" ] && [ "${fptype}" != "$1" ]; then
+      echo "ERROR! Options -inlonly and -inlLonly are incompatible"; usage
+    fi
+    helinl="1"
+    shift
+  elif [ "$1" == "-inlLonly" ]; then
+    if [ "${helinl}" != "0" ] && [ "${fptype}" != "$1" ]; then
+      echo "ERROR! Options -inlonly and -inlLonly are incompatible"; usage
+    fi
+    helinl="L"
+    shift
   elif [ "$1" == "-makeonly" ] || [ "$1" == "-makeclean" ] || [ "$1" == "-makecleanonly" ]; then
     if [ "${maketype}" != "" ] && [ "${maketype}" != "$1" ]; then
       echo "ERROR! Options -makeonly, -makeclean and -makecleanonly are incompatible"; usage
@@ -164,6 +178,9 @@ else
   xsecthr="3E-14"
 fi
 
+# Switch between helinl builds
+export HELINL=$helinl
+
 # Determine the working directory below topdir based on suff, bckend and <process>
 function showdir()
 {
@@ -335,7 +352,7 @@ function runcheck()
   if [ "${cmd/gcheckmax128thr}" != "$cmd" ]; then
     txt="GCHECK(MAX128THR)"
     cmd=${cmd/gcheckmax128thr/check_${backend}} # hack: run cuda/hip check with tput fastest settings
-    cmd=${cmd/.\//.\/build.${backend}_${fptype}_inl0_hrd0\/}
+    cmd=${cmd/.\//.\/build.${backend}_${fptype}_inl${helinl}_hrd0\/}
     nblk=$(getgridmax | cut -d ' ' -f1)
     nthr=$(getgridmax | cut -d ' ' -f2)
     while [ $nthr -lt 128 ]; do (( nthr = nthr * 2 )); (( nblk = nblk / 2 )); done
@@ -343,7 +360,7 @@ function runcheck()
   elif [ "${cmd/gcheckmax8thr}" != "$cmd" ]; then
     txt="GCHECK(MAX8THR)"
     cmd=${cmd/gcheckmax8thr/check_${backend}} # hack: run cuda/hip check with tput fastest settings
-    cmd=${cmd/.\//.\/build.${backend}_${fptype}_inl0_hrd0\/}
+    cmd=${cmd/.\//.\/build.${backend}_${fptype}_inl${helinl}_hrd0\/}
     nblk=$(getgridmax | cut -d ' ' -f1)
     nthr=$(getgridmax | cut -d ' ' -f2)
     while [ $nthr -gt 8 ]; do (( nthr = nthr / 2 )); (( nblk = nblk * 2 )); done
@@ -351,14 +368,14 @@ function runcheck()
   elif [ "${cmd/gcheckmax}" != "$cmd" ]; then
     txt="GCHECK(MAX)"
     cmd=${cmd/gcheckmax/check_${backend}} # hack: run cuda/hip check with tput fastest settings
-    cmd=${cmd/.\//.\/build.${backend}_${fptype}_inl0_hrd0\/}
+    cmd=${cmd/.\//.\/build.${backend}_${fptype}_inl${helinl}_hrd0\/}
     nblk=$(getgridmax | cut -d ' ' -f1)
     nthr=$(getgridmax | cut -d ' ' -f2)
     (( nevt = nblk*nthr ))
   elif [ "${cmd/gcheck}" != "$cmd" ]; then
     txt="GCHECK($NLOOP)"
     cmd=${cmd/gcheck/check_${backend}}
-    cmd=${cmd/.\//.\/build.${backend}_${fptype}_inl0_hrd0\/}
+    cmd=${cmd/.\//.\/build.${backend}_${fptype}_inl${helinl}_hrd0\/}
     nthr=32
     (( nblk = NLOOP/nthr )) || true # integer division (NB: bash double parenthesis fails if the result is 0)
     (( nloop2 = nblk*nthr )) || true
@@ -367,7 +384,7 @@ function runcheck()
   elif [ "${cmd/check}" != "$cmd" ]; then
     txt="CHECK($NLOOP)"
     cmd=${cmd/check/check_cpp}
-    cmd=${cmd/.\//.\/build.${backend}_${fptype}_inl0_hrd0\/}
+    cmd=${cmd/.\//.\/build.${backend}_${fptype}_inl${helinl}_hrd0\/}
     nthr=32
     (( nblk = NLOOP/nthr )) || true # integer division (NB: bash double parenthesis fails if the result is 0)
     (( nloop2 = nblk*nthr )) || true
@@ -395,12 +412,12 @@ function runmadevent()
   cmd=$1
   if [ "${cmd/madevent_cpp}" != "$cmd" ]; then
     tmpin=$(getinputfile -cpp)
-    cmd=${cmd/.\//.\/build.${backend}_${fptype}_inl0_hrd0\/}
+    cmd=${cmd/.\//.\/build.${backend}_${fptype}_inl${helinl}_hrd0\/}
   elif [ "${cmd/madevent_cuda}" != "$cmd" ]; then
-    cmd=${cmd/.\//.\/build.cuda_${fptype}_inl0_hrd0\/}
+    cmd=${cmd/.\//.\/build.cuda_${fptype}_inl${helinl}_hrd0\/}
     tmpin=$(getinputfile -cuda)
   elif [ "${cmd/madevent_hip}" != "$cmd" ]; then
-    cmd=${cmd/.\//.\/build.hip_${fptype}_inl0_hrd0\/}
+    cmd=${cmd/.\//.\/build.hip_${fptype}_inl${helinl}_hrd0\/}
     tmpin=$(getinputfile -hip)
   else # assume this is madevent_fortran (do not check)
     tmpin=$(getinputfile -fortran)
diff --git a/epochX/cudacpp/tmad/teeMadX.sh b/epochX/cudacpp/tmad/teeMadX.sh
index 8393e8572d..36aa22a9b4 100755
--- a/epochX/cudacpp/tmad/teeMadX.sh
+++ b/epochX/cudacpp/tmad/teeMadX.sh
@@ -10,7 +10,7 @@ cd $scrdir
 
 function usage()
 {
-  echo "Usage: $0 <processes [-eemumu][-ggtt][-ggttg][-ggttgg][-ggttggg][-gguu][-gqttq][-heftggbb][-susyggtt][-susyggt1t1][-smeftggtttt]> [-flt|-fltonly|-mix|-mixonly] [-makeonly] [-makeclean] [-rmrdat] [+10x] [-checkonly]" > /dev/stderr
+  echo "Usage: $0 <processes [-eemumu][-ggtt][-ggttg][-ggttgg][-ggttggg][-gguu][-gqttq][-heftggbb][-susyggtt][-susyggt1t1][-smeftggtttt]> [-flt|-fltonly|-mix|-mixonly] [-inl|-inlonly|-inlL|-inlLonly] [-makeonly] [-makeclean] [-rmrdat] [+10x] [-checkonly]" > /dev/stderr
   exit 1
 }
 
@@ -29,7 +29,7 @@ smeftggtttt=
 
 suffs="mad"
 fptypes="d"
-helinls="0"
+helinls="" # set default later
 hrdcods="0"
 
 steps="make test"
@@ -92,12 +92,18 @@ for arg in $*; do
   elif [ "$arg" == "-mixonly" ]; then
     if [ "${fptypes}" != "d" ] && [ "${fptypes}" != "m" ]; then echo "ERROR! Options -flt, -fltonly, -mix and -mixonly are incompatible"; usage; fi
     fptypes="m"
-  #elif [ "$arg" == "-inl" ]; then
-  #  if [ "${helinls}" == "1" ]; then echo "ERROR! Options -inl and -inlonly are incompatible"; usage; fi
-  #  helinls="0 1"
-  #elif [ "$arg" == "-inlonly" ]; then
-  #  if [ "${helinls}" == "0 1" ]; then echo "ERROR! Options -inl and -inlonly are incompatible"; usage; fi
-  #  helinls="1"
+  elif [ "$arg" == "-inl" ]; then
+    if [ "${helinls}" != "" ]; then echo "ERROR! Options -inl, -inlonly, -inlL, -inlLonly are incompatible (and can be specified only once)"; usage; fi
+    helinls="0 1"
+  elif [ "$arg" == "-inlonly" ]; then
+    if [ "${helinls}" != "" ]; then echo "ERROR! Options -inl, -inlonly, -inlL, -inlLonly are incompatible (and can be specified only once)"; usage; fi
+    helinls="1"
+  elif [ "$arg" == "-inlL" ]; then
+    if [ "${helinls}" != "" ]; then echo "ERROR! Options -inl, -inlonly, -inlL, -inlLonly are incompatible (and can be specified only once)"; usage; fi
+    helinls="0 1 L"
+  elif [ "$arg" == "-inlLonly" ]; then
+    if [ "${helinls}" != "" ]; then echo "ERROR! Options -inl, -inlonly, -inlL, -inlLonly are incompatible (and can be specified only once)"; usage; fi
+    helinls="L"
   #elif [ "$arg" == "-hrd" ]; then
   #  if [ "${hrdcods}" == "1" ]; then echo "ERROR! Options -hrd and -hrdonly are incompatible"; usage; fi
   #  hrdcods="0 1"
@@ -129,6 +135,9 @@ for arg in $*; do
   fi  
 done
 
+# Set defaults a posteriori
+if [ "${helinls}" == "" ]; then helinls="0"; fi
+
 # Check that at least one process has been selected
 if [ "${procs}" == "" ]; then usage; fi
 
@@ -141,7 +150,7 @@ for step in $steps; do
       for fptype in $fptypes; do
         flt=; if [ "${fptype}" == "f" ]; then flt=" -fltonly"; elif [ "${fptype}" == "m" ]; then flt=" -mixonly"; fi
         for helinl in $helinls; do
-          inl=; if [ "${helinl}" == "1" ]; then inl=" -inlonly"; fi
+          inl=; if [ "${helinl}" == "1" ]; then inl=" -inlonly"; elif [ "${helinl}" == "L" ]; then inl=" -inlLonly"; fi
           for hrdcod in $hrdcods; do
             hrd=; if [ "${hrdcod}" == "1" ]; then hrd=" -hrdonly"; fi
             args="${proc}${flt}${inl}${hrd}${deb}${rmrdat}${add10x}${checkonly} ${dlp}"

From 348ebfdafb51583b2dc793c0e2eb293f5e4582e2 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 22:05:24 +0200
Subject: [PATCH 23/50] [helas] add HelAmps.cc to all regenerated processes

git add *.mad/*/HelAmps.cc *.mad/*/*/HelAmps.cc *.sa/*/HelAmps.cc *.sa/*/*/HelAmps.cc
---
 .../ee_mumu.mad/SubProcesses/HelAmps.cc       | 181 +++++++++++++
 .../SubProcesses/P1_epem_mupmum/HelAmps.cc    |   1 +
 .../ee_mumu.sa/SubProcesses/HelAmps.cc        | 181 +++++++++++++
 .../P1_Sigma_sm_epem_mupmum/HelAmps.cc        |   1 +
 .../cudacpp/gg_tt.sa/SubProcesses/HelAmps.cc  | 124 +++++++++
 .../P1_Sigma_sm_gg_ttx/HelAmps.cc             |   1 +
 .../gg_tt01g.mad/SubProcesses/HelAmps.cc      | 201 ++++++++++++++
 .../SubProcesses/P1_gg_ttx/HelAmps.cc         |   1 +
 .../SubProcesses/P2_gg_ttxg/HelAmps.cc        |   1 +
 .../gg_ttg.mad/SubProcesses/HelAmps.cc        | 201 ++++++++++++++
 .../SubProcesses/P1_gg_ttxg/HelAmps.cc        |   1 +
 .../cudacpp/gg_ttg.sa/SubProcesses/HelAmps.cc | 201 ++++++++++++++
 .../P1_Sigma_sm_gg_ttxg/HelAmps.cc            |   1 +
 .../gg_ttgg.mad/SubProcesses/HelAmps.cc       | 246 ++++++++++++++++++
 .../SubProcesses/P1_gg_ttxgg/HelAmps.cc       |   1 +
 .../gg_ttgg.sa/SubProcesses/HelAmps.cc        | 246 ++++++++++++++++++
 .../P1_Sigma_sm_gg_ttxgg/HelAmps.cc           |   1 +
 .../gg_ttggg.mad/SubProcesses/HelAmps.cc      | 246 ++++++++++++++++++
 .../SubProcesses/P1_gg_ttxggg/HelAmps.cc      |   1 +
 .../gg_ttggg.sa/SubProcesses/HelAmps.cc       | 246 ++++++++++++++++++
 .../P1_Sigma_sm_gg_ttxggg/HelAmps.cc          |   1 +
 .../gq_ttq.mad/SubProcesses/HelAmps.cc        | 138 ++++++++++
 .../SubProcesses/P1_gu_ttxu/HelAmps.cc        |   1 +
 .../SubProcesses/P1_gux_ttxux/HelAmps.cc      |   1 +
 .../cudacpp/gq_ttq.sa/SubProcesses/HelAmps.cc | 138 ++++++++++
 .../P1_Sigma_sm_gu_ttxu/HelAmps.cc            |   1 +
 .../P1_Sigma_sm_gux_ttxux/HelAmps.cc          |   1 +
 .../heft_gg_bb.mad/SubProcesses/HelAmps.cc    | 153 +++++++++++
 .../SubProcesses/P1_gg_bbx/HelAmps.cc         |   1 +
 .../heft_gg_bb.sa/SubProcesses/HelAmps.cc     | 153 +++++++++++
 .../P1_Sigma_heft_gg_bbx/HelAmps.cc           |   1 +
 .../pp_tt012j.mad/SubProcesses/HelAmps.cc     | 246 ++++++++++++++++++
 .../SubProcesses/P0_gg_ttx/HelAmps.cc         |   1 +
 .../SubProcesses/P0_uux_ttx/HelAmps.cc        |   1 +
 .../SubProcesses/P1_gg_ttxg/HelAmps.cc        |   1 +
 .../SubProcesses/P1_gu_ttxu/HelAmps.cc        |   1 +
 .../SubProcesses/P1_gux_ttxux/HelAmps.cc      |   1 +
 .../SubProcesses/P1_uux_ttxg/HelAmps.cc       |   1 +
 .../SubProcesses/P2_gg_ttxgg/HelAmps.cc       |   1 +
 .../SubProcesses/P2_gg_ttxuux/HelAmps.cc      |   1 +
 .../SubProcesses/P2_gu_ttxgu/HelAmps.cc       |   1 +
 .../SubProcesses/P2_gux_ttxgux/HelAmps.cc     |   1 +
 .../SubProcesses/P2_uc_ttxuc/HelAmps.cc       |   1 +
 .../SubProcesses/P2_ucx_ttxucx/HelAmps.cc     |   1 +
 .../SubProcesses/P2_uu_ttxuu/HelAmps.cc       |   1 +
 .../SubProcesses/P2_uux_ttxccx/HelAmps.cc     |   1 +
 .../SubProcesses/P2_uux_ttxgg/HelAmps.cc      |   1 +
 .../SubProcesses/P2_uux_ttxuux/HelAmps.cc     |   1 +
 .../SubProcesses/P2_uxcx_ttxuxcx/HelAmps.cc   |   1 +
 .../SubProcesses/P2_uxux_ttxuxux/HelAmps.cc   |   1 +
 .../smeft_gg_tttt.mad/SubProcesses/HelAmps.cc | 198 ++++++++++++++
 .../SubProcesses/P1_gg_ttxttx/HelAmps.cc      |   1 +
 .../smeft_gg_tttt.sa/SubProcesses/HelAmps.cc  | 198 ++++++++++++++
 .../HelAmps.cc                                |   1 +
 .../susy_gg_t1t1.mad/SubProcesses/HelAmps.cc  | 139 ++++++++++
 .../SubProcesses/P1_gg_t1t1x/HelAmps.cc       |   1 +
 .../susy_gg_t1t1.sa/SubProcesses/HelAmps.cc   | 139 ++++++++++
 .../P1_Sigma_MSSM_SLHA2_gg_t1t1x/HelAmps.cc   |   1 +
 .../susy_gg_tt.mad/SubProcesses/HelAmps.cc    | 124 +++++++++
 .../SubProcesses/P1_gg_ttx/HelAmps.cc         |   1 +
 .../susy_gg_tt.sa/SubProcesses/HelAmps.cc     | 124 +++++++++
 .../P1_Sigma_MSSM_SLHA2_gg_ttx/HelAmps.cc     |   1 +
 62 files changed, 3864 insertions(+)
 create mode 100644 epochX/cudacpp/ee_mumu.mad/SubProcesses/HelAmps.cc
 create mode 120000 epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/HelAmps.cc
 create mode 100644 epochX/cudacpp/ee_mumu.sa/SubProcesses/HelAmps.cc
 create mode 120000 epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/HelAmps.cc
 create mode 100644 epochX/cudacpp/gg_tt.sa/SubProcesses/HelAmps.cc
 create mode 120000 epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/HelAmps.cc
 create mode 100644 epochX/cudacpp/gg_tt01g.mad/SubProcesses/HelAmps.cc
 create mode 120000 epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/HelAmps.cc
 create mode 120000 epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/HelAmps.cc
 create mode 100644 epochX/cudacpp/gg_ttg.mad/SubProcesses/HelAmps.cc
 create mode 120000 epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/HelAmps.cc
 create mode 100644 epochX/cudacpp/gg_ttg.sa/SubProcesses/HelAmps.cc
 create mode 120000 epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/HelAmps.cc
 create mode 100644 epochX/cudacpp/gg_ttgg.mad/SubProcesses/HelAmps.cc
 create mode 120000 epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/HelAmps.cc
 create mode 100644 epochX/cudacpp/gg_ttgg.sa/SubProcesses/HelAmps.cc
 create mode 120000 epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/HelAmps.cc
 create mode 100644 epochX/cudacpp/gg_ttggg.mad/SubProcesses/HelAmps.cc
 create mode 120000 epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/HelAmps.cc
 create mode 100644 epochX/cudacpp/gg_ttggg.sa/SubProcesses/HelAmps.cc
 create mode 120000 epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/HelAmps.cc
 create mode 100644 epochX/cudacpp/gq_ttq.mad/SubProcesses/HelAmps.cc
 create mode 120000 epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/HelAmps.cc
 create mode 120000 epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/HelAmps.cc
 create mode 100644 epochX/cudacpp/gq_ttq.sa/SubProcesses/HelAmps.cc
 create mode 120000 epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/HelAmps.cc
 create mode 120000 epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/HelAmps.cc
 create mode 100644 epochX/cudacpp/heft_gg_bb.mad/SubProcesses/HelAmps.cc
 create mode 120000 epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/HelAmps.cc
 create mode 100644 epochX/cudacpp/heft_gg_bb.sa/SubProcesses/HelAmps.cc
 create mode 120000 epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/HelAmps.cc
 create mode 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/HelAmps.cc
 create mode 120000 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/HelAmps.cc
 create mode 120000 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/HelAmps.cc
 create mode 120000 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/HelAmps.cc
 create mode 120000 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/HelAmps.cc
 create mode 120000 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/HelAmps.cc
 create mode 120000 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/HelAmps.cc
 create mode 120000 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/HelAmps.cc
 create mode 120000 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/HelAmps.cc
 create mode 120000 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/HelAmps.cc
 create mode 120000 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/HelAmps.cc
 create mode 120000 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/HelAmps.cc
 create mode 120000 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/HelAmps.cc
 create mode 120000 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/HelAmps.cc
 create mode 120000 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/HelAmps.cc
 create mode 120000 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/HelAmps.cc
 create mode 120000 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/HelAmps.cc
 create mode 120000 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/HelAmps.cc
 create mode 120000 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/HelAmps.cc
 create mode 100644 epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/HelAmps.cc
 create mode 120000 epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/HelAmps.cc
 create mode 100644 epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/HelAmps.cc
 create mode 120000 epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/HelAmps.cc
 create mode 100644 epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/HelAmps.cc
 create mode 120000 epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/HelAmps.cc
 create mode 100644 epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/HelAmps.cc
 create mode 120000 epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/HelAmps.cc
 create mode 100644 epochX/cudacpp/susy_gg_tt.mad/SubProcesses/HelAmps.cc
 create mode 120000 epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc
 create mode 100644 epochX/cudacpp/susy_gg_tt.sa/SubProcesses/HelAmps.cc
 create mode 120000 epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/HelAmps.cc

diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/HelAmps.cc
new file mode 100644
index 0000000000..43c887914f
--- /dev/null
+++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/HelAmps.cc
@@ -0,0 +1,181 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV2_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV2_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV2_3( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M3,
+                 const fptype W3,
+                 fptype allV3[] )
+  {
+    return FFV2_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV4_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV4_3( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M3,
+                 const fptype W3,
+                 fptype allV3[] )
+  {
+    return FFV4_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV2_4_0( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   fptype allvertexes[] )
+  {
+    return FFV2_4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV2_4_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] )
+  {
+    return FFV2_4_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+}
+#endif
diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/HelAmps.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/HelAmps.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/HelAmps.cc
new file mode 100644
index 0000000000..43c887914f
--- /dev/null
+++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/HelAmps.cc
@@ -0,0 +1,181 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV2_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV2_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV2_3( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M3,
+                 const fptype W3,
+                 fptype allV3[] )
+  {
+    return FFV2_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV4_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV4_3( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M3,
+                 const fptype W3,
+                 fptype allV3[] )
+  {
+    return FFV4_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV2_4_0( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   fptype allvertexes[] )
+  {
+    return FFV2_4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV2_4_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] )
+  {
+    return FFV2_4_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+}
+#endif
diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/HelAmps.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/HelAmps.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/HelAmps.cc
new file mode 100644
index 0000000000..79486e92f0
--- /dev/null
+++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/HelAmps.cc
@@ -0,0 +1,124 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+}
+#endif
diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/HelAmps.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/HelAmps.cc
new file mode 100644
index 0000000000..fbad4a8555
--- /dev/null
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/HelAmps.cc
@@ -0,0 +1,201 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV1_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV3P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV4P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+}
+#endif
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/HelAmps.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/HelAmps.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/HelAmps.cc
new file mode 100644
index 0000000000..fbad4a8555
--- /dev/null
+++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/HelAmps.cc
@@ -0,0 +1,201 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV1_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV3P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV4P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+}
+#endif
diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/HelAmps.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/HelAmps.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/HelAmps.cc
new file mode 100644
index 0000000000..fbad4a8555
--- /dev/null
+++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/HelAmps.cc
@@ -0,0 +1,201 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV1_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV3P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV4P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+}
+#endif
diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/HelAmps.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/HelAmps.cc
new file mode 100644
index 0000000000..845cf9fd87
--- /dev/null
+++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/HelAmps.cc
@@ -0,0 +1,246 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV1_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] )
+  {
+    return VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] )
+  {
+    return VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV3P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] )
+  {
+    return VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV4P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+}
+#endif
diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/HelAmps.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/HelAmps.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/HelAmps.cc
new file mode 100644
index 0000000000..845cf9fd87
--- /dev/null
+++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/HelAmps.cc
@@ -0,0 +1,246 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV1_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] )
+  {
+    return VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] )
+  {
+    return VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV3P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] )
+  {
+    return VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV4P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+}
+#endif
diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/HelAmps.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/HelAmps.cc
new file mode 100644
index 0000000000..845cf9fd87
--- /dev/null
+++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/HelAmps.cc
@@ -0,0 +1,246 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV1_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] )
+  {
+    return VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] )
+  {
+    return VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV3P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] )
+  {
+    return VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV4P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+}
+#endif
diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/HelAmps.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/HelAmps.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/HelAmps.cc
new file mode 100644
index 0000000000..845cf9fd87
--- /dev/null
+++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/HelAmps.cc
@@ -0,0 +1,246 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV1_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] )
+  {
+    return VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] )
+  {
+    return VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV3P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] )
+  {
+    return VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV4P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+}
+#endif
diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/HelAmps.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/HelAmps.cc
new file mode 100644
index 0000000000..b1b8a27d42
--- /dev/null
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/HelAmps.cc
@@ -0,0 +1,138 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV1_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+}
+#endif
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/HelAmps.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/HelAmps.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/HelAmps.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/HelAmps.cc
new file mode 100644
index 0000000000..b1b8a27d42
--- /dev/null
+++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/HelAmps.cc
@@ -0,0 +1,138 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV1_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+}
+#endif
diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/HelAmps.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/HelAmps.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/HelAmps.cc
new file mode 100644
index 0000000000..649ad4ab18
--- /dev/null
+++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/HelAmps.cc
@@ -0,0 +1,153 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], V2[6]
+  __device__ void
+  linker_VVS3_3( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M3,
+                 const fptype W3,
+                 fptype allS3[] )
+  {
+    return VVS3_3<W_ACCESS, CD_ACCESS>( allV1, allV2, allCOUP, Ccoeff, M3, W3, allS3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], S3[6]
+  __device__ void
+  linker_FFS2_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allS3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFS2_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allS3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+}
+#endif
diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/HelAmps.cc b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/HelAmps.cc b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/HelAmps.cc
new file mode 100644
index 0000000000..649ad4ab18
--- /dev/null
+++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/HelAmps.cc
@@ -0,0 +1,153 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], V2[6]
+  __device__ void
+  linker_VVS3_3( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M3,
+                 const fptype W3,
+                 fptype allS3[] )
+  {
+    return VVS3_3<W_ACCESS, CD_ACCESS>( allV1, allV2, allCOUP, Ccoeff, M3, W3, allS3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], S3[6]
+  __device__ void
+  linker_FFS2_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allS3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFS2_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allS3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+}
+#endif
diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/HelAmps.cc b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/HelAmps.cc
new file mode 100644
index 0000000000..845cf9fd87
--- /dev/null
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/HelAmps.cc
@@ -0,0 +1,246 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV1_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] )
+  {
+    return VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] )
+  {
+    return VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV3P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV3P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] )
+  {
+    return VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV4P0_1( const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allV4[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allV1[] )
+  {
+    return VVVV4P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+}
+#endif
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/HelAmps.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/HelAmps.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/HelAmps.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/HelAmps.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/HelAmps.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/HelAmps.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/HelAmps.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/HelAmps.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/HelAmps.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/HelAmps.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/HelAmps.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/HelAmps.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/HelAmps.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/HelAmps.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/HelAmps.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/HelAmps.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/HelAmps.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/HelAmps.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/HelAmps.cc
new file mode 100644
index 0000000000..b1e8df624a
--- /dev/null
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/HelAmps.cc
@@ -0,0 +1,198 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV5_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV5P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] )
+  {
+    return VVV5P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] )
+  {
+    return VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV9_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] )
+  {
+    return VVVV9_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV10_0( const fptype allV1[],
+                   const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allV4[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   fptype allvertexes[] )
+  {
+    return VVVV10_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+}
+#endif
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/HelAmps.cc b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/HelAmps.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/HelAmps.cc
new file mode 100644
index 0000000000..b1e8df624a
--- /dev/null
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/HelAmps.cc
@@ -0,0 +1,198 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  __device__ void
+  linker_VVV5_0( const fptype allV1[],
+                 const fptype allV2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV5P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] )
+  {
+    return VVV5P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  __device__ void
+  linker_FFV1P0_3( const fptype allF1[],
+                   const fptype allF2[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M3,
+                   const fptype W3,
+                   fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV1_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] )
+  {
+    return VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV9_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allV3[],
+                  const fptype allV4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] )
+  {
+    return VVVV9_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  __device__ void
+  linker_VVVV10_0( const fptype allV1[],
+                   const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allV4[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   fptype allvertexes[] )
+  {
+    return VVVV10_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+}
+#endif
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/HelAmps.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/HelAmps.cc
new file mode 100644
index 0000000000..b984c35d2f
--- /dev/null
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/HelAmps.cc
@@ -0,0 +1,139 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], S2[6], S3[6]
+  __device__ void
+  linker_VSS1_0( const fptype allV1[],
+                 const fptype allS2[],
+                 const fptype allS3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return VSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allS2, allS3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'S2[6]' from the input wavefunctions V1[6], S3[6]
+  __device__ void
+  linker_VSS1_2( const fptype allV1[],
+                 const fptype allS3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allS2[] )
+  {
+    return VSS1_2<W_ACCESS, CD_ACCESS>( allV1, allS3, allCOUP, Ccoeff, M2, W2, allS2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], S2[6]
+  __device__ void
+  linker_VSS1_3( const fptype allV1[],
+                 const fptype allS2[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M3,
+                 const fptype W3,
+                 fptype allS3[] )
+  {
+    return VSS1_3<W_ACCESS, CD_ACCESS>( allV1, allS2, allCOUP, Ccoeff, M3, W3, allS3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], S3[6], S4[6]
+  __device__ void
+  linker_VVSS1_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allS3[],
+                  const fptype allS4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] )
+  {
+    return VVSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allS3, allS4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+}
+#endif
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/HelAmps.cc b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/HelAmps.cc b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/HelAmps.cc
new file mode 100644
index 0000000000..b984c35d2f
--- /dev/null
+++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/HelAmps.cc
@@ -0,0 +1,139 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], S2[6], S3[6]
+  __device__ void
+  linker_VSS1_0( const fptype allV1[],
+                 const fptype allS2[],
+                 const fptype allS3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return VSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allS2, allS3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'S2[6]' from the input wavefunctions V1[6], S3[6]
+  __device__ void
+  linker_VSS1_2( const fptype allV1[],
+                 const fptype allS3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allS2[] )
+  {
+    return VSS1_2<W_ACCESS, CD_ACCESS>( allV1, allS3, allCOUP, Ccoeff, M2, W2, allS2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], S2[6]
+  __device__ void
+  linker_VSS1_3( const fptype allV1[],
+                 const fptype allS2[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M3,
+                 const fptype W3,
+                 fptype allS3[] )
+  {
+    return VSS1_3<W_ACCESS, CD_ACCESS>( allV1, allS2, allCOUP, Ccoeff, M3, W3, allS3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], S3[6], S4[6]
+  __device__ void
+  linker_VVSS1_0( const fptype allV1[],
+                  const fptype allV2[],
+                  const fptype allS3[],
+                  const fptype allS4[],
+                  const fptype allCOUP[],
+                  const double Ccoeff,
+                  fptype allvertexes[] )
+  {
+    return VVSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allS3, allS4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+}
+#endif
diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/HelAmps.cc b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/HelAmps.cc
new file mode 100644
index 0000000000..79486e92f0
--- /dev/null
+++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/HelAmps.cc
@@ -0,0 +1,124 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+}
+#endif
diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/HelAmps.cc b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/HelAmps.cc
new file mode 100644
index 0000000000..79486e92f0
--- /dev/null
+++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/HelAmps.cc
@@ -0,0 +1,124 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  __device__ void
+  linker_VVV1P0_1( const fptype allV2[],
+                   const fptype allV3[],
+                   const fptype allCOUP[],
+                   const double Ccoeff,
+                   const fptype M1,
+                   const fptype W1,
+                   fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  __device__ void
+  linker_FFV1_0( const fptype allF1[],
+                 const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  __device__ void
+  linker_FFV1_1( const fptype allF2[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M1,
+                 const fptype W1,
+                 fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  __device__ void
+  linker_FFV1_2( const fptype allF1[],
+                 const fptype allV3[],
+                 const fptype allCOUP[],
+                 const double Ccoeff,
+                 const fptype M2,
+                 const fptype W2,
+                 fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+}
+#endif
diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/HelAmps.cc b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file

From 3ecf99e82d13fb40b9966793567625324e156bd5 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 22:06:24 +0200
Subject: [PATCH 24/50] [helas] aborted tput test of ggttggg with all helinl
 values - inl1 build failed?

./tput/teeThroughputX.sh -ggttggg -makej -makeclean -inlL

ccache /usr/local/cuda-12.0/bin/nvcc  -I. -I../../src  -Xcompiler -O3 -gencode arch=compute_70,code=compute_70 -gencode arch=compute_70,code=sm_70 -lineinfo -use_fast_math -I/usr/local/cuda-12.0/include/ -DUSE_NVTX  -std=c++17  -ccbin /usr/lib64/ccache/g++ -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE -DMGONGPU_INLINE_HELAMPS -Xcompiler -fPIC -c -x cu CPPProcess.cc -o build.cuda_d_inl1_hrd0/CPPProcess_cuda.o
nvcc error   : 'ptxas' died due to signal 9 (Kill signal)
make[2]: *** [cudacpp.mk:754: build.cuda_d_inl1_hrd0/CPPProcess_cuda.o] Error 9
make[2]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
make[1]: *** [makefile:142: build.cuda_d_inl1_hrd0/.cudacpplibs] Error 2
make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
make: *** [makefile:282: bldcuda] Error 2
make: *** Waiting for unfinished jobs....
---
 .../log_ggttggg_mad_d_inl0_hrd0.txt           | 242 +-----------------
 1 file changed, 3 insertions(+), 239 deletions(-)

diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
index 69ee294d0a..c42fc4fd79 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
@@ -1,240 +1,4 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
-BACKEND=cpp512y (was cppauto)
-OMPFLAGS=
-FPTYPE='d'
-HELINL='0'
-HRDCOD='0'
-HASCURAND=hasCurand
-HASHIPRAND=hasNoHiprand
-Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
-make: Nothing to be done for 'gtestlibs'.
-
-make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-
-make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-
-make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-
-make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-
-make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-
-make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-
-DATE: 2024-08-08_19:59:44
-
-On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-=========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP=
-INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
-Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.065566e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.065949e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.066073e+02                 )  sec^-1
-MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     2.441334 sec
-INFO: No Floating Point Exceptions have been reported
-     8,270,107,004      cycles                           #    2.987 GHz                    
-    17,474,421,900      instructions                     #    2.11  insn per cycle         
-       2.824451613 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
-==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
-.........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP=
-INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
-Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.242290e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.244758e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.245006e+03                 )  sec^-1
-MeanMatrixElemValue         = ( 1.856249e-04 +- 8.329951e-05 )  GeV^-6
-TOTAL       :     4.011109 sec
-INFO: No Floating Point Exceptions have been reported
-    12,991,708,385      cycles                           #    2.995 GHz                    
-    30,957,069,887      instructions                     #    2.38  insn per cycle         
-       4.393935391 seconds time elapsed
--------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
-INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
-INFO: No Floating Point Exceptions have been reported
--------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
-Avg ME (C++/GPU)   = 9.872263e-03
-Avg ME (F77/GPU)   = 9.8722595284406640E-003
-Relative difference = 3.5164777671934515e-07
-OK (relative difference <= 5E-3)
-=========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe
-=========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
-INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
-Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.391032e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.391286e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.391286e+01                 )  sec^-1
-MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     6.292298 sec
-INFO: No Floating Point Exceptions have been reported
-    18,909,993,943      cycles                           #    3.004 GHz                    
-    53,904,007,557      instructions                     #    2.85  insn per cycle         
-       6.296177339 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:32425) (avx2:    0) (512y:    0) (512z:    0)
--------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe
-INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
-INFO: No Floating Point Exceptions have been reported
--------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
-Avg ME (C++/C++)    = 9.872263e-03
-Avg ME (F77/C++)    = 9.8722595285514851E-003
-Relative difference = 3.5163655122073967e-07
-OK (relative difference <= 5E-3)
-=========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
-INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
-Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.592148e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.592238e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.592238e+02                 )  sec^-1
-MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     3.319128 sec
-INFO: No Floating Point Exceptions have been reported
-     9,961,985,828      cycles                           #    2.999 GHz                    
-    27,151,879,178      instructions                     #    2.73  insn per cycle         
-       3.323113942 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:96499) (avx2:    0) (512y:    0) (512z:    0)
--------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
-INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
-INFO: No Floating Point Exceptions have been reported
--------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
-Avg ME (C++/C++)    = 9.872263e-03
-Avg ME (F77/C++)    = 9.8722595285514851E-003
-Relative difference = 3.5163655122073967e-07
-OK (relative difference <= 5E-3)
-=========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
-INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
-Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.420642e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.421042e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.421042e+02                 )  sec^-1
-MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.544804 sec
-INFO: No Floating Point Exceptions have been reported
-     4,330,644,690      cycles                           #    2.797 GHz                    
-     9,589,874,862      instructions                     #    2.21  insn per cycle         
-       1.548809848 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:84971) (512y:    0) (512z:    0)
--------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
-INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
-INFO: No Floating Point Exceptions have been reported
--------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
-Avg ME (C++/C++)    = 9.872263e-03
-Avg ME (F77/C++)    = 9.8722595285411531E-003
-Relative difference = 3.516375977906115e-07
-OK (relative difference <= 5E-3)
-=========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
-INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
-Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.965040e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.965659e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.965659e+02                 )  sec^-1
-MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.333170 sec
-INFO: No Floating Point Exceptions have been reported
-     3,730,547,974      cycles                           #    2.792 GHz                    
-     8,513,850,652      instructions                     #    2.28  insn per cycle         
-       1.336769828 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:80619) (512y:   89) (512z:    0)
--------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe
-INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
-INFO: No Floating Point Exceptions have been reported
--------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
-Avg ME (C++/C++)    = 9.872263e-03
-Avg ME (F77/C++)    = 9.8722595285411531E-003
-Relative difference = 3.516375977906115e-07
-OK (relative difference <= 5E-3)
-=========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
-INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
-Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.618586e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.619123e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.619123e+02                 )  sec^-1
-MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.462675 sec
-INFO: No Floating Point Exceptions have been reported
-     2,695,334,241      cycles                           #    1.839 GHz                    
-     4,280,276,658      instructions                     #    1.59  insn per cycle         
-       1.466339679 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2852) (512y:  103) (512z:79119)
--------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe
-INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
-INFO: No Floating Point Exceptions have been reported
--------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
-Avg ME (C++/C++)    = 9.872263e-03
-Avg ME (F77/C++)    = 9.8722595285411531E-003
-Relative difference = 3.516375977906115e-07
-OK (relative difference <= 5E-3)
-=========================================================================
-
-TEST COMPLETED
+------------------------------------------------
+Preliminary build completed in 0d 00h 13m 11s
+------------------------------------------------

From de8d452a4e9d6e71d300f91518d529fbe1ba01c4 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 22:11:29 +0200
Subject: [PATCH 25/50] [helas] rerun the ggttggg tput test only in inl0 mode -
 note that the build time is from cache

./tput/teeThroughputX.sh -ggttggg -makej -makeclean
---
 .../log_ggttggg_mad_d_inl0_hrd0.txt           | 242 +++++++++++++++++-
 1 file changed, 241 insertions(+), 1 deletion(-)

diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
index c42fc4fd79..a026555170 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
@@ -1,4 +1,244 @@
 
 ------------------------------------------------
-Preliminary build completed in 0d 00h 13m 11s
+Preliminary build completed in 0d 00h 00m 55s
 ------------------------------------------------
+
+Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
+BACKEND=cpp512y (was cppauto)
+OMPFLAGS=
+FPTYPE='d'
+HELINL='0'
+HRDCOD='0'
+HASCURAND=hasCurand
+HASHIPRAND=hasNoHiprand
+Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
+make: Nothing to be done for 'gtestlibs'.
+
+make USEBUILDDIR=1 BACKEND=cuda
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+make USEBUILDDIR=1 BACKEND=cppnone
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+make USEBUILDDIR=1 BACKEND=cppsse4
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+make USEBUILDDIR=1 BACKEND=cpp512y
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+make USEBUILDDIR=1 BACKEND=cpp512z
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+DATE: 2024-08-28_22:09:17
+
+On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 4.063038e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.063437e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.063626e+02                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
+TOTAL       :     2.552546 sec
+INFO: No Floating Point Exceptions have been reported
+     7,969,059,552      cycles                           #    2.893 GHz                    
+    17,401,037,642      instructions                     #    2.18  insn per cycle         
+       2.954791685 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
+==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
+.........................................................................
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 9.259402e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.261632e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.261912e+03                 )  sec^-1
+MeanMatrixElemValue         = ( 1.856249e-04 +- 8.329951e-05 )  GeV^-6
+TOTAL       :     3.997932 sec
+INFO: No Floating Point Exceptions have been reported
+    12,585,047,715      cycles                           #    2.912 GHz                    
+    29,739,266,768      instructions                     #    2.36  insn per cycle         
+       4.379850043 seconds time elapsed
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+Avg ME (C++/GPU)   = 9.872263e-03
+Avg ME (F77/GPU)   = 9.8722595284406640E-003
+Relative difference = 3.5164777671934515e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
+EvtsPerSec[Rmb+ME]     (23) = ( 8.083869e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.084097e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.084097e+01                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
+TOTAL       :     6.536770 sec
+INFO: No Floating Point Exceptions have been reported
+    18,925,108,902      cycles                           #    2.894 GHz                    
+    53,904,723,767      instructions                     #    2.85  insn per cycle         
+       6.540767069 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:32425) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 9.872263e-03
+Avg ME (F77/C++)    = 9.8722595285514851E-003
+Relative difference = 3.5163655122073967e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 1.537822e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.537916e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.537916e+02                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
+TOTAL       :     3.434365 sec
+INFO: No Floating Point Exceptions have been reported
+     9,964,139,970      cycles                           #    2.899 GHz                    
+    27,150,624,518      instructions                     #    2.72  insn per cycle         
+       3.438404316 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:96499) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 9.872263e-03
+Avg ME (F77/C++)    = 9.8722595285514851E-003
+Relative difference = 3.5163655122073967e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 3.336918e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.337312e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.337312e+02                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
+TOTAL       :     1.583780 sec
+INFO: No Floating Point Exceptions have been reported
+     4,293,557,962      cycles                           #    2.705 GHz                    
+     9,590,149,972      instructions                     #    2.23  insn per cycle         
+       1.587790520 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:84971) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 9.872263e-03
+Avg ME (F77/C++)    = 9.8722595285411531E-003
+Relative difference = 3.516375977906115e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 3.835352e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.836003e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.836003e+02                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
+TOTAL       :     1.378567 sec
+INFO: No Floating Point Exceptions have been reported
+     3,738,350,469      cycles                           #    2.705 GHz                    
+     8,514,195,736      instructions                     #    2.28  insn per cycle         
+       1.382567882 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:80619) (512y:   89) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 9.872263e-03
+Avg ME (F77/C++)    = 9.8722595285411531E-003
+Relative difference = 3.516375977906115e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 3.399596e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.400143e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.400143e+02                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
+TOTAL       :     1.555703 sec
+INFO: No Floating Point Exceptions have been reported
+     2,693,364,168      cycles                           #    1.728 GHz                    
+     4,280,800,287      instructions                     #    1.59  insn per cycle         
+       1.559932195 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2852) (512y:  103) (512z:79119)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 9.872263e-03
+Avg ME (F77/C++)    = 9.8722595285411531E-003
+Relative difference = 3.516375977906115e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+
+TEST COMPLETED

From 93f351b57f512d52a2a36585294349ed5f04d17c Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 22:12:50 +0200
Subject: [PATCH 26/50] [helas] manually fix the build time in the ggttggg tput
 test in inl0 mode (use that from the previous run, not from cache)

./tput/teeThroughputX.sh -ggttggg -makej -makeclean
---
 .../tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
index a026555170..aed0b00b44 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
@@ -1,6 +1,6 @@
 
 ------------------------------------------------
-Preliminary build completed in 0d 00h 00m 55s
+Preliminary build completed in 0d 00h 13m 11s
 ------------------------------------------------
 
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg

From bc897191933a894bd7d141dbfeb2378e42d41d26 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 28 Aug 2024 22:29:21 +0200
Subject: [PATCH 27/50] [helas] first run of the ggttggg tput test in inlL mode
 - build is a factor x2 faster (c++? cuda?), runtime is 5-10% slower in C++,
 but 5-10% faster in cuda!?

./tput/teeThroughputX.sh -ggttggg -makej -makeclean -inlLonly

diff -u --color tput/logs_ggttggg_mad/log_ggttggg_mad_d_inlL_hrd0.txt  tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
...
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inlL_hrd0/check_cuda.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.338149e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.338604e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.338867e+02                 )  sec^-1
-MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     2.242693 sec
-INFO: No Floating Point Exceptions have been reported
-     7,348,976,543      cycles                           #    2.902 GHz
-    16,466,315,526      instructions                     #    2.24  insn per cycle
-       2.591057214 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inlL_hrd0/check_cuda.exe -p 1 256 1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.063038e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.063437e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.063626e+02                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
+TOTAL       :     2.552546 sec
+INFO: No Floating Point Exceptions have been reported
+     7,969,059,552      cycles                           #    2.893 GHz
+    17,401,037,642      instructions                     #    2.18  insn per cycle
+       2.954791685 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
...
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inlL_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.459662e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.460086e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.460086e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.835352e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.836003e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.836003e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.528240 sec
+TOTAL       :     1.378567 sec
 INFO: No Floating Point Exceptions have been reported
-     4,140,408,789      cycles                           #    2.703 GHz
-     9,072,597,595      instructions                     #    2.19  insn per cycle
-       1.532357792 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:94048) (512y:   91) (512z:    0)
+     3,738,350,469      cycles                           #    2.705 GHz
+     8,514,195,736      instructions                     #    2.28  insn per cycle
+       1.382567882 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:80619) (512y:   89) (512z:    0)
 -------------------------------------------------------------------------
---
 .../log_ggttggg_mad_d_inlL_hrd0.txt           | 244 ++++++++++++++++++
 1 file changed, 244 insertions(+)
 create mode 100644 epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inlL_hrd0.txt

diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inlL_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inlL_hrd0.txt
new file mode 100644
index 0000000000..b48700b97f
--- /dev/null
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inlL_hrd0.txt
@@ -0,0 +1,244 @@
+
+------------------------------------------------
+Preliminary build completed in 0d 00h 07m 24s
+------------------------------------------------
+
+Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
+BACKEND=cpp512y (was cppauto)
+OMPFLAGS=
+FPTYPE='d'
+HELINL='0'
+HRDCOD='0'
+HASCURAND=hasCurand
+HASHIPRAND=hasNoHiprand
+Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
+make: Nothing to be done for 'gtestlibs'.
+
+make USEBUILDDIR=1 BACKEND=cuda
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+make USEBUILDDIR=1 BACKEND=cppnone
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+make USEBUILDDIR=1 BACKEND=cppsse4
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+make USEBUILDDIR=1 BACKEND=cpp512y
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+make USEBUILDDIR=1 BACKEND=cpp512z
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+DATE: 2024-08-28_22:24:33
+
+On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inlL_hrd0/check_cuda.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 4.338149e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.338604e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.338867e+02                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
+TOTAL       :     2.242693 sec
+INFO: No Floating Point Exceptions have been reported
+     7,348,976,543      cycles                           #    2.902 GHz                    
+    16,466,315,526      instructions                     #    2.24  insn per cycle         
+       2.591057214 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inlL_hrd0/check_cuda.exe -p 1 256 1
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
+==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
+.........................................................................
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inlL_hrd0/check_cuda.exe -p 64 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 8.503743e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.505652e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.505876e+03                 )  sec^-1
+MeanMatrixElemValue         = ( 1.856249e-04 +- 8.329951e-05 )  GeV^-6
+TOTAL       :     4.295817 sec
+INFO: No Floating Point Exceptions have been reported
+    13,394,338,134      cycles                           #    2.918 GHz                    
+    29,875,097,657      instructions                     #    2.23  insn per cycle         
+       4.645638907 seconds time elapsed
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inlL_hrd0/runTest_cuda.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inlL_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inlL_hrd0/fcheck_cuda.exe 2 64 2
+Avg ME (C++/GPU)   = 9.872263e-03
+Avg ME (F77/GPU)   = 9.8722595284406658E-003
+Relative difference = 3.516477765436282e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inlL_hrd0/check_hip.exe
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inlL_hrd0/check_cpp.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
+EvtsPerSec[Rmb+ME]     (23) = ( 7.428130e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.428322e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.428322e+01                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
+TOTAL       :     7.109317 sec
+INFO: No Floating Point Exceptions have been reported
+    20,650,921,982      cycles                           #    2.904 GHz                    
+    56,867,588,911      instructions                     #    2.75  insn per cycle         
+       7.113345899 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:40993) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 9.872263e-03
+Avg ME (F77/C++)    = 9.8722595285514695E-003
+Relative difference = 3.516365528021918e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inlL_hrd0/check_cpp.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 1.439600e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.439678e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.439678e+02                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
+TOTAL       :     3.669607 sec
+INFO: No Floating Point Exceptions have been reported
+    10,661,868,211      cycles                           #    2.903 GHz                    
+    28,187,072,418      instructions                     #    2.64  insn per cycle         
+       3.673685531 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:100554) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 9.872263e-03
+Avg ME (F77/C++)    = 9.8722595285514678E-003
+Relative difference = 3.5163655297790867e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inlL_hrd0/check_cpp.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 3.055903e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.056245e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.056245e+02                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
+TOTAL       :     1.729784 sec
+INFO: No Floating Point Exceptions have been reported
+     4,640,645,280      cycles                           #    2.678 GHz                    
+     9,974,510,085      instructions                     #    2.15  insn per cycle         
+       1.733938549 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:94205) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 9.872263e-03
+Avg ME (F77/C++)    = 9.8722595285411548E-003
+Relative difference = 3.5163759761489463e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inlL_hrd0/check_cpp.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 3.459662e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.460086e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.460086e+02                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
+TOTAL       :     1.528240 sec
+INFO: No Floating Point Exceptions have been reported
+     4,140,408,789      cycles                           #    2.703 GHz                    
+     9,072,597,595      instructions                     #    2.19  insn per cycle         
+       1.532357792 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:94048) (512y:   91) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 9.872263e-03
+Avg ME (F77/C++)    = 9.8722595285411548E-003
+Relative difference = 3.5163759761489463e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inlL_hrd0/check_cpp.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 3.220979e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.221430e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.221430e+02                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
+TOTAL       :     1.642045 sec
+INFO: No Floating Point Exceptions have been reported
+     2,840,456,827      cycles                           #    1.726 GHz                    
+     4,549,279,782      instructions                     #    1.60  insn per cycle         
+       1.646333171 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5078) (512y:  105) (512z:89892)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 9.872263e-03
+Avg ME (F77/C++)    = 9.8722595285411548E-003
+Relative difference = 3.5163759761489463e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+
+TEST COMPLETED

From 125b7b49e42578c8c15f54f2e92ddf37cf666fcb Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Thu, 29 Aug 2024 11:22:03 +0200
Subject: [PATCH 28/50] [helas] first run of the ggttggg tmad test in inlL mode
 - runtime is 10-15% slower in both C++ and cuda

diff -u --color tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inlL_hrd0.txt tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt

-Executing ' ./build.512y_d_inlL_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
+Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
@@ -401,10 +401,10 @@
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  320.6913s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.5138s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  316.1312s for    90112 events => throughput is 2.85E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0463s
+ [COUNTERS] PROGRAM TOTAL          :  288.3304s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4909s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  283.7968s for    90112 events => throughput is 3.18E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0426s

-Executing ' ./build.cuda_d_inlL_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
+Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
@@ -557,10 +557,10 @@
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086656006E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :   19.6663s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.9649s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   13.4667s for    90112 events => throughput is 6.69E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    1.2347s
+ [COUNTERS] PROGRAM TOTAL          :   18.0242s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.9891s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   11.9530s for    90112 events => throughput is 7.54E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    1.0821s
---
 .../log_ggttggg_mad_d_inl0_hrd0.txt           | 154 ++---
 .../log_ggttggg_mad_d_inlL_hrd0.txt           | 615 ++++++++++++++++++
 2 files changed, 692 insertions(+), 77 deletions(-)
 create mode 100644 epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inlL_hrd0.txt

diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
index dab5f736a0..3147b869f0 100644
--- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
@@ -1,11 +1,11 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 
-
 make USEBUILDDIR=1 BACKEND=cuda
 
 
 make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppsse4
+
 make USEBUILDDIR=1 BACKEND=cppavx2
 
 make USEBUILDDIR=1 BACKEND=cpp512y
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-08_20:59:36
+DATE: 2024-08-29_00:29:47
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0
  [UNWEIGHT] Wrote 1 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  102.0811s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5116s
- [COUNTERS] Fortran MEs      ( 1 ) :  101.5694s for     8192 events => throughput is 8.07E+01 events/s
+ [COUNTERS] PROGRAM TOTAL          :  104.7057s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5086s
+ [COUNTERS] Fortran MEs      ( 1 ) :  104.1971s for     8192 events => throughput is 7.86E+01 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  102.0739s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5163s
- [COUNTERS] Fortran MEs      ( 1 ) :  101.5576s for     8192 events => throughput is 8.07E+01 events/s
+ [COUNTERS] PROGRAM TOTAL          :  103.9667s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5128s
+ [COUNTERS] Fortran MEs      ( 1 ) :  103.4539s for     8192 events => throughput is 7.92E+01 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          : 1120.7697s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.3745s
- [COUNTERS] Fortran MEs      ( 1 ) : 1116.3951s for    90112 events => throughput is 8.07E+01 events/s
+ [COUNTERS] PROGRAM TOTAL          : 1143.5314s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4515s
+ [COUNTERS] Fortran MEs      ( 1 ) : 1139.0798s for    90112 events => throughput is 7.91E+01 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,10 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939193E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  122.6268s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5175s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  121.9186s for     8192 events => throughput is 6.72E+01 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1907s
+ [COUNTERS] PROGRAM TOTAL          :  126.8964s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5197s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  126.1682s for     8192 events => throughput is 6.49E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2085s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -167,10 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          : 1388.7153s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.3988s
- [COUNTERS] CudaCpp MEs      ( 2 ) : 1384.1234s for    90112 events => throughput is 6.51E+01 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1931s
+ [COUNTERS] PROGRAM TOTAL          : 1403.2870s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5004s
+ [COUNTERS] CudaCpp MEs      ( 2 ) : 1398.5804s for    90112 events => throughput is 6.44E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2061s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.880201e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.651833e+01                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.389775e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.593965e+01                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,10 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939197E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   60.8180s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5182s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   60.1993s for     8192 events => throughput is 1.36E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1005s
+ [COUNTERS] PROGRAM TOTAL          :   62.5955s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5183s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   61.9733s for     8192 events => throughput is 1.32E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1039s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -245,10 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086656017E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  663.6261s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4076s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  659.1171s for    90112 events => throughput is 1.37E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1014s
+ [COUNTERS] PROGRAM TOTAL          :  682.9443s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5013s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  678.3401s for    90112 events => throughput is 1.33E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1029s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.603881e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.542499e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.607115e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.547949e+02                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,10 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   28.7968s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5160s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   28.2344s for     8192 events => throughput is 2.90E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0464s
+ [COUNTERS] PROGRAM TOTAL          :   29.4732s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5195s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   28.9048s for     8192 events => throughput is 2.83E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0489s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -323,10 +323,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  314.6312s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4324s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  310.1525s for    90112 events => throughput is 2.91E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0464s
+ [COUNTERS] PROGRAM TOTAL          :  326.1256s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4980s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  321.5780s for    90112 events => throughput is 2.80E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0495s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.378917e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.348554e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.496128e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.339258e+02                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,10 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   25.3254s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5203s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   24.7644s for     8192 events => throughput is 3.31E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0408s
+ [COUNTERS] PROGRAM TOTAL          :   26.4013s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5189s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   25.8409s for     8192 events => throughput is 3.17E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0416s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -401,10 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  277.9808s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4083s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  273.5305s for    90112 events => throughput is 3.29E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0420s
+ [COUNTERS] PROGRAM TOTAL          :  288.3304s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4909s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  283.7968s for    90112 events => throughput is 3.18E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0426s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.986386e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.889567e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.006448e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.856009e+02                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,10 +445,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   25.0869s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5172s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   24.5238s for     8192 events => throughput is 3.34E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0459s
+ [COUNTERS] PROGRAM TOTAL          :   26.2781s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5185s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   25.7127s for     8192 events => throughput is 3.19E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0470s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -479,10 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  271.0840s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.3948s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  266.6404s for    90112 events => throughput is 3.38E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0489s
+ [COUNTERS] PROGRAM TOTAL          :  288.1563s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5041s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  283.6047s for    90112 events => throughput is 3.18E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0475s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.641160e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.430830e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.622116e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.419526e+02                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,10 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :    3.2426s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.0583s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.0970s for     8192 events => throughput is 7.47E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    1.0873s
+ [COUNTERS] PROGRAM TOTAL          :    3.2059s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.0215s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.0984s for     8192 events => throughput is 7.46E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    1.0860s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -557,10 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086656006E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :   17.9203s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.9107s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   11.9249s for    90112 events => throughput is 7.56E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    1.0847s
+ [COUNTERS] PROGRAM TOTAL          :   18.0242s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.9891s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   11.9530s for    90112 events => throughput is 7.54E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    1.0821s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.521131e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.504621e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.292650e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.292840e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.241733e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.223985e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 512 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.585186e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.586185e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.235154e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.274801e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.473644e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.422438e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.236111e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.222510e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.235762e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.234675e+03                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inlL_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inlL_hrd0.txt
new file mode 100644
index 0000000000..d2204615a7
--- /dev/null
+++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inlL_hrd0.txt
@@ -0,0 +1,615 @@
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
+
+make USEBUILDDIR=1 BACKEND=cuda
+
+
+make USEBUILDDIR=1 BACKEND=cppnone
+
+make USEBUILDDIR=1 BACKEND=cppsse4
+make USEBUILDDIR=1 BACKEND=cppavx2
+
+make USEBUILDDIR=1 BACKEND=cpp512y
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+make USEBUILDDIR=1 BACKEND=cpp512z
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+OMP_NUM_THREADS=
+
+DATE: 2024-08-28_22:56:29
+
+On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
+
+*** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+8192 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran'
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 128/128
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0
+ [UNWEIGHT] Wrote 1 events (found 407 events)
+ [COUNTERS] PROGRAM TOTAL          :  104.2587s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5124s
+ [COUNTERS] Fortran MEs      ( 1 ) :  103.7463s for     8192 events => throughput is 7.90E+01 events/s
+
+*** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+8192 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran'
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 128/128
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0
+ [UNWEIGHT] Wrote 70 events (found 407 events)
+ [COUNTERS] PROGRAM TOTAL          :  104.3674s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5136s
+ [COUNTERS] Fortran MEs      ( 1 ) :  103.8538s for     8192 events => throughput is 7.89E+01 events/s
+
+*** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+81920 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran'
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 128/128
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 303 events (found 1531 events)
+ [COUNTERS] PROGRAM TOTAL          : 1144.4747s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4497s
+ [COUNTERS] Fortran MEs      ( 1 ) : 1140.0250s for    90112 events => throughput is 7.90E+01 events/s
+
+*** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+8192 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./build.none_d_inlL_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 128/128
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 1.24e-06 [1.2403985227939197E-006] fbridge_mode=1
+ [UNWEIGHT] Wrote 70 events (found 407 events)
+ [COUNTERS] PROGRAM TOTAL          :  143.1637s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5170s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  142.4135s for     8192 events => throughput is 5.75E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2331s
+
+*** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
+
+OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939197E-006) differ by less than 3E-14 (1.7763568394002505e-15)
+
+*** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical
+
+*** (2-none) EXECUTE MADEVENT_CPP x10 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+81920 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./build.none_d_inlL_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 128/128
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 2.332e-07 [2.3322993086656006E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 303 events (found 1531 events)
+ [COUNTERS] PROGRAM TOTAL          : 1584.8411s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4979s
+ [COUNTERS] CudaCpp MEs      ( 2 ) : 1580.1217s for    90112 events => throughput is 5.70E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2215s
+
+*** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
+
+OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656006E-007) differ by less than 3E-14 (1.7763568394002505e-15)
+
+*** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
+
+*** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 7.064548e+01                 )  sec^-1
+
+*** EXECUTE CHECK(8192) -p 256 32 1 ***
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 7.012515e+01                 )  sec^-1
+
+*** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+8192 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./build.sse4_d_inlL_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 128/128
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1
+ [UNWEIGHT] Wrote 70 events (found 407 events)
+ [COUNTERS] PROGRAM TOTAL          :   67.8957s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5203s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   67.2615s for     8192 events => throughput is 1.22E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1138s
+
+*** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
+
+OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939195E-006) differ by less than 3E-14 (1.7763568394002505e-15)
+
+*** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical
+
+*** (2-sse4) EXECUTE MADEVENT_CPP x10 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+81920 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./build.sse4_d_inlL_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 128/128
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 303 events (found 1531 events)
+ [COUNTERS] PROGRAM TOTAL          :  744.9981s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5014s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  740.3867s for    90112 events => throughput is 1.22E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1100s
+
+*** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
+
+OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15)
+
+*** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
+
+*** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 1.465109e+02                 )  sec^-1
+
+*** EXECUTE CHECK(8192) -p 256 32 1 ***
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 1.456991e+02                 )  sec^-1
+
+*** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+8192 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./build.avx2_d_inlL_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 128/128
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1
+ [UNWEIGHT] Wrote 70 events (found 407 events)
+ [COUNTERS] PROGRAM TOTAL          :   32.8435s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5217s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   32.2689s for     8192 events => throughput is 2.54E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0530s
+
+*** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
+
+OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939195E-006) differ by less than 3E-14 (1.7763568394002505e-15)
+
+*** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical
+
+*** (2-avx2) EXECUTE MADEVENT_CPP x10 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+81920 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./build.avx2_d_inlL_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 128/128
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 303 events (found 1531 events)
+ [COUNTERS] PROGRAM TOTAL          :  359.2894s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5142s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  354.7232s for    90112 events => throughput is 2.54E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0520s
+
+*** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
+
+OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15)
+
+*** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
+
+*** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 3.096824e+02                 )  sec^-1
+
+*** EXECUTE CHECK(8192) -p 256 32 1 ***
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 3.124137e+02                 )  sec^-1
+
+*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+8192 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./build.512y_d_inlL_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 128/128
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1
+ [UNWEIGHT] Wrote 70 events (found 407 events)
+ [COUNTERS] PROGRAM TOTAL          :   29.2042s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5176s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   28.6402s for     8192 events => throughput is 2.86E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0464s
+
+*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
+
+OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939195E-006) differ by less than 3E-14 (1.7763568394002505e-15)
+
+*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical
+
+*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+81920 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./build.512y_d_inlL_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 128/128
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 303 events (found 1531 events)
+ [COUNTERS] PROGRAM TOTAL          :  320.6913s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5138s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  316.1312s for    90112 events => throughput is 2.85E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0463s
+
+*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
+
+OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15)
+
+*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
+
+*** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 3.507403e+02                 )  sec^-1
+
+*** EXECUTE CHECK(8192) -p 256 32 1 ***
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 3.509785e+02                 )  sec^-1
+
+*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+8192 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./build.512z_d_inlL_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 128/128
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1
+ [UNWEIGHT] Wrote 70 events (found 407 events)
+ [COUNTERS] PROGRAM TOTAL          :   28.3605s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5210s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   27.7898s for     8192 events => throughput is 2.95E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0497s
+
+*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
+
+OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939195E-006) differ by less than 3E-14 (1.7763568394002505e-15)
+
+*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical
+
+*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+81920 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./build.512z_d_inlL_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 128/128
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 303 events (found 1531 events)
+ [COUNTERS] PROGRAM TOTAL          :  313.4261s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5141s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  308.8625s for    90112 events => throughput is 2.92E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0494s
+
+*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
+
+OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15)
+
+*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
+
+*** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 3.237768e+02                 )  sec^-1
+
+*** EXECUTE CHECK(8192) -p 256 32 1 ***
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 3.237055e+02                 )  sec^-1
+
+*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+8192 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./build.cuda_d_inlL_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 128/128
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1
+ [UNWEIGHT] Wrote 70 events (found 407 events)
+ [COUNTERS] PROGRAM TOTAL          :    3.4515s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.9832s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.2329s for     8192 events => throughput is 6.64E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    1.2354s
+
+*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
+
+OK! xsec from fortran (1.2403985227939174E-006) and cuda (1.2403985227939195E-006) differ by less than 3E-14 (1.7763568394002505e-15)
+
+*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical
+
+*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+81920 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./build.cuda_d_inlL_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 128/128
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 2.332e-07 [2.3322993086656006E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 303 events (found 1531 events)
+ [COUNTERS] PROGRAM TOTAL          :   19.6663s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.9649s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   13.4667s for    90112 events => throughput is 6.69E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    1.2347s
+
+*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
+
+OK! xsec from fortran (2.3322993086655967E-007) and cuda (2.3322993086656006E-007) differ by less than 3E-14 (1.7763568394002505e-15)
+
+*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
+
+*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
+Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 6.697997e+03                 )  sec^-1
+
+*** EXECUTE GCHECK(8192) -p 256 32 1 ***
+Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 9.001430e+03                 )  sec^-1
+
+*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge ***
+Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 8.490932e+03                 )  sec^-1
+
+*** EXECUTE GCHECK(MAX) -p 512 32 1 ***
+Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 9.099504e+03                 )  sec^-1
+
+*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge ***
+Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 8.480979e+03                 )  sec^-1
+
+*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 ***
+Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 8.864809e+03                 )  sec^-1
+
+*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge ***
+Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 8.495272e+03                 )  sec^-1
+
+*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 ***
+Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 3.066155e+03                 )  sec^-1
+
+*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
+
+TEST COMPLETED

From a4721584893dede1e162b38f8b9f32dcc2d34105 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Thu, 29 Aug 2024 14:03:19 +0200
Subject: [PATCH 29/50] [helas] in ee_mumu.mad, replace CD_ACCESS by CI_ACCESS
 to fix build warnings and runtime test failures in HELINL=0

There are still build failures in HELINL=L
---
 .../cudacpp/ee_mumu.mad/SubProcesses/HelAmps.cc  | 16 ++++++++--------
 epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h      | 16 ++++++++--------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/HelAmps.cc
index 43c887914f..bbd47d32e7 100644
--- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/HelAmps.cc
@@ -71,7 +71,7 @@ namespace mg5amcCpu
                  const double Ccoeff,
                  fptype allvertexes[] )
   {
-    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+    return FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
@@ -86,7 +86,7 @@ namespace mg5amcCpu
                    const fptype W3,
                    fptype allV3[] )
   {
-    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+    return FFV1P0_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
   }
 
   //--------------------------------------------------------------------------
@@ -100,7 +100,7 @@ namespace mg5amcCpu
                  const double Ccoeff,
                  fptype allvertexes[] )
   {
-    return FFV2_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+    return FFV2_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
@@ -115,7 +115,7 @@ namespace mg5amcCpu
                  const fptype W3,
                  fptype allV3[] )
   {
-    return FFV2_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+    return FFV2_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
   }
 
   //--------------------------------------------------------------------------
@@ -129,7 +129,7 @@ namespace mg5amcCpu
                  const double Ccoeff,
                  fptype allvertexes[] )
   {
-    return FFV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+    return FFV4_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
@@ -144,7 +144,7 @@ namespace mg5amcCpu
                  const fptype W3,
                  fptype allV3[] )
   {
-    return FFV4_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+    return FFV4_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
   }
 
   //--------------------------------------------------------------------------
@@ -158,7 +158,7 @@ namespace mg5amcCpu
                    const double Ccoeff,
                    fptype allvertexes[] )
   {
-    return FFV2_4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+    return FFV2_4_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
@@ -173,7 +173,7 @@ namespace mg5amcCpu
                    const fptype W3,
                    fptype allV3[] )
   {
-    return FFV2_4_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+    return FFV2_4_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
   }
 
   //--------------------------------------------------------------------------
diff --git a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h
index a392267d1b..0145294ff5 100644
--- a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h
@@ -1211,14 +1211,14 @@ namespace mg5amcCpu
 
 #ifndef MGONGPU_LINKER_HELAMPS
 
-#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
-#define helas_FFV2_0 FFV2_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV2_3 FFV2_3<W_ACCESS, CD_ACCESS>
-#define helas_FFV4_0 FFV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV4_3 FFV4_3<W_ACCESS, CD_ACCESS>
-#define helas_FFV2_4_0 FFV2_4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV2_4_3 FFV2_4_3<W_ACCESS, CD_ACCESS>
+#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CI_ACCESS>
+#define helas_FFV2_0 FFV2_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_FFV2_3 FFV2_3<W_ACCESS, CI_ACCESS>
+#define helas_FFV4_0 FFV4_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_FFV4_3 FFV4_3<W_ACCESS, CI_ACCESS>
+#define helas_FFV2_4_0 FFV2_4_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_FFV2_4_3 FFV2_4_3<W_ACCESS, CI_ACCESS>
 
 #else
 

From a2b181051996ded0dfa12a98ac19627ecd1965b5 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Thu, 29 Aug 2024 14:52:47 +0200
Subject: [PATCH 30/50] [helas] in ee_mumu.mad and CODEGEN, add missing
 arguments (allCOUP1, allCOUP2 instead of allCOUP) to FFV2_4_0 and FFV2_4_3,
 fixing build failures in HELINL=L

---
 .../CUDACPP_SA_OUTPUT/model_handling.py       | 21 +++++++++++--------
 .../ee_mumu.mad/SubProcesses/HelAmps.cc       | 16 ++++++++------
 epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h   | 12 +++++++----
 3 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py
index 2327596bf4..983b7a01a3 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py
@@ -121,11 +121,15 @@ class PLUGIN_ALOHAWriter(aloha_writers.ALOHAWriterForGPU):
     ###nodeclare = False # old behaviour (separate declaration with no initialization)
     nodeclare = True # new behaviour (delayed declaration with initialisation)
 
-    # AV - modify aloha_writers.WriteALOHA method (add a debug printout)
+    # AV - modify/enhance aloha_writers.WriteALOHA method (add a debug printout and add additional outputs)
     def write(self, **opt):
         ###misc.sprint('Entering PLUGIN_ALOHAWriter.write')
-        out = super().write(**opt)
-        return out
+        h_rout, cc_rout = super().write(**opt) # this is a tuple
+        h2a_rout = self.get_header_txt(mode='linker_define1')
+        h2b_rout = self.get_header_txt(mode='linker_define2')
+        h2c_rout = self.get_header_txt(mode='linker_decl')
+        cc2_rout = self.get_header_txt(mode='linker_impl')
+        return ( h_rout, cc_rout, h2a_rout, h2b_rout, h2c_rout, cc2_rout )
 
     # AV - modify aloha_writers.ALOHAWriterForCPP method (improve formatting)
     def change_number_format(self, number):
@@ -175,6 +179,7 @@ def get_header_txt(self, name=None, couplings=None, mode=''):
             - function tag
             - definition of variable
         """
+        ###misc.sprint('get_header_txt',mode)
         if name is None:
             name = self.name
         if mode=='':
@@ -187,6 +192,7 @@ def get_header_txt(self, name=None, couplings=None, mode=''):
         argnames = []
         comment_inputs = [] # AV
         for format, argname in self.define_argument_list(couplings):
+            ###misc.sprint(format, argname) # note: for ee_mumu this already includes COUP1 and COUP2 separately
             if format.startswith('list'):
                 type = self.type2def[format[5:]] # double or complex (instead of list_double or list_complex)
                 comment_inputs.append('%s[6]'%argname) # AV (wavefuncsize=6 is hardcoded also in export_cpp...)
@@ -1107,18 +1113,15 @@ def write_aloha_routines(self):
         else:
             aloha_model.compute_all(save=False, custom_propa=True)
         for abstracthelas in dict(aloha_model).values():
+            ###misc.sprint(type(abstracthelas), abstracthelas.name) # AV this is the loop on FFV functions
             print(type(abstracthelas), abstracthelas.name) # AV this is the loop on FFV functions
-            h_rout, cc_rout = abstracthelas.write(output_dir=None, language=self.aloha_writer, mode='no_include') # AV this eventually calls PLUGIN_ALOHAWriter.write
+            ###h_rout, cc_rout = abstracthelas.write(output_dir=None, language=self.aloha_writer, mode='no_include') # AV this eventually calls PLUGIN_ALOHAWriter.write
+            h_rout, cc_rout, h2a_rout, h2b_rout, h2c_rout, cc2_rout = abstracthelas.write(output_dir=None, language=self.aloha_writer, mode='no_include')
             template_h_files.append(h_rout)
             template_cc_files.append(cc_rout)
-            writer2 = aloha_writers.WriterFactory(abstracthelas, self.aloha_writer, None, abstracthelas.tag) # AV as in create_aloha.AbstractRoutine,write
-            h2a_rout = writer2.get_header_txt(mode='linker_define1')
-            h2b_rout = writer2.get_header_txt(mode='linker_define2')
-            h2c_rout = writer2.get_header_txt(mode='linker_decl')
             template_h2a_files.append(h2a_rout)
             template_h2b_files.append(h2b_rout)
             template_h2c_files.append(h2c_rout)
-            cc2_rout = writer2.get_header_txt(mode='linker_impl')
             template_cc2_files.append(cc2_rout)
         replace_dict['function_declarations'] = '\n'.join(template_h_files)
         replace_dict['function_definitions'] = '\n'.join(template_cc_files)
diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/HelAmps.cc
index bbd47d32e7..af338cacc4 100644
--- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/HelAmps.cc
@@ -154,11 +154,13 @@ namespace mg5amcCpu
   linker_FFV2_4_0( const fptype allF1[],
                    const fptype allF2[],
                    const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
+                   const fptype allCOUP1[],
+                   const double Ccoeff1,
+                   const fptype allCOUP2[],
+                   const double Ccoeff2,
                    fptype allvertexes[] )
   {
-    return FFV2_4_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+    return FFV2_4_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP1, Ccoeff1, allCOUP2, Ccoeff2, allvertexes );
   }
 
   //--------------------------------------------------------------------------
@@ -167,13 +169,15 @@ namespace mg5amcCpu
   __device__ void
   linker_FFV2_4_3( const fptype allF1[],
                    const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
+                   const fptype allCOUP1[],
+                   const double Ccoeff1,
+                   const fptype allCOUP2[],
+                   const double Ccoeff2,
                    const fptype M3,
                    const fptype W3,
                    fptype allV3[] )
   {
-    return FFV2_4_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+    return FFV2_4_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP1, Ccoeff1, allCOUP2, Ccoeff2, M3, W3, allV3 );
   }
 
   //--------------------------------------------------------------------------
diff --git a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h
index 0145294ff5..dbb94120ef 100644
--- a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h
@@ -1307,8 +1307,10 @@ namespace mg5amcCpu
   linker_FFV2_4_0( const fptype allF1[],
                    const fptype allF2[],
                    const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
+                   const fptype allCOUP1[],
+                   const double Ccoeff1,
+                   const fptype allCOUP2[],
+                   const double Ccoeff2,
                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
@@ -1317,8 +1319,10 @@ namespace mg5amcCpu
   __device__ void
   linker_FFV2_4_3( const fptype allF1[],
                    const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
+                   const fptype allCOUP1[],
+                   const double Ccoeff1,
+                   const fptype allCOUP2[],
+                   const double Ccoeff2,
                    const fptype M3,
                    const fptype W3,
                    fptype allV3[] );

From 4a00c9c3a16529d3250e8c59f3abcc95b36dc980 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Thu, 29 Aug 2024 16:53:40 +0200
Subject: [PATCH 31/50] [helas] in CODEGEN, add pairs of helas and linker
 functions for CD and CI access, to fix the issues observed in ee_mumu

I did not find an easier way to do this, because the model is known in the aloha caller but not at the time of aloha codegen
---
 .../CUDACPP_SA_OUTPUT/model_handling.py       | 82 +++++++++++--------
 1 file changed, 47 insertions(+), 35 deletions(-)

diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py
index 983b7a01a3..7ec2f0af3e 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py
@@ -222,7 +222,8 @@ def get_header_txt(self, name=None, couplings=None, mode=''):
             outputname = 'allvertexes'
             comment_output = 'amplitude \'vertex\''
             template = '  template<class W_ACCESS, class A_ACCESS, class C_ACCESS>\n'
-            template_define1 = '<W_ACCESS, A_ACCESS, CD_ACCESS>'
+            template_defineCD = '<W_ACCESS, A_ACCESS, CD_ACCESS>'
+            template_defineCI = '<W_ACCESS, A_ACCESS, CI_ACCESS>'
         else:
             output = '%(doublec)s all%(spin)s%(id)d[]' % {
                      'doublec': self.type2def['double'],
@@ -234,38 +235,46 @@ def get_header_txt(self, name=None, couplings=None, mode=''):
             ###self.declaration.add(('list_complex', output)) # AV BUG FIX - THIS IS NOT NEEDED AND IS WRONG (adds name 'cxtype_sv V3[]')
             comment_output = 'wavefunction \'%s%d[6]\'' % ( self.particles[self.outgoing -1], self.outgoing ) # AV (wavefuncsize=6)
             template = '  template<class W_ACCESS, class C_ACCESS>\n'
-            template_define1 = '<W_ACCESS, CD_ACCESS>'
+            template_defineCD = '<W_ACCESS, CD_ACCESS>'
+            template_defineCI = '<W_ACCESS, CI_ACCESS>'
         if 'linker' in mode: template = ''
-        comment = '// Compute the output %s from the input wavefunctions %s' % ( comment_output, ', '.join(comment_inputs) ) # AV
-        if 'linker_decl' in mode or 'linker_impl' in mode : name2 = 'linker_' + name
-        else: name2 = name
-        indent = ' ' * len( '  %s( ' % name2 )
-        if not 'linker_define' in mode :
-            out.write('  %(comment)s\n%(template)s  %(prefix)s void\n  %(name)s( const %(args)s,\n%(indent)s%(output)s )%(suffix)s' %
-                      {'comment': comment, # AV - add comment
-                       'template': template, # AV - add template
-                       'prefix': self.prefix + ( ' INLINE' if 'is_h' in mode else '' ), # AV - add INLINE
-                       'suffix': ( ' ALWAYS_INLINE' if 'is_h' in mode else '' ), # AV - add ALWAYS_INLINE
-                       'indent':indent, 'output':output, 'name': name2,
-                       'args': (',\n' + indent + 'const ').join(args)}) # AV - add const, add indent
-            if 'is_h' in mode or 'linker_decl' in mode:
-                out.write(';\n')
-                out.write('\n  //--------------------------------------------------------------------------\n') # AV add footer
-            elif 'linker_impl' in mode:
-                out.write('\n  {\n')
-                out.write('    return %(name)s%(template)s( %(args)s, %(output)s );' %
-                          {'name': name,
-                           'template': template_define1,
-                           'output': outputname, 
-                           'args': ', '.join(argnames)})
-                out.write('\n  }\n')
-                out.write('\n  //--------------------------------------------------------------------------\n') # AV add footer
-            else:
-                out.write('\n  {\n') # AV
-        elif 'linker_define1' in mode :
-            out.write('#define helas_%s %s%s'%(name,name,template_define1))
+        comment = '  // Compute the output %s from the input wavefunctions %s' % ( comment_output, ', '.join(comment_inputs) )
+        if 'linker_define1' in mode :
+            out.write('#define helas_CD_%s %s%s\n'%(name,name,template_defineCD))
+            out.write('#define helas_CI_%s %s%s'%(name,name,template_defineCI))
+        elif 'linker_define2' in mode :
+            out.write('#define helas_CD_%s linker_CD_%s\n'%(name,name))
+            out.write('#define helas_CI_%s linker_CI_%s'%(name,name))
         else:
-            out.write('#define helas_%s linker_%s'%(name,name))
+            if 'linker_decl' in mode or 'linker_impl' in mode :
+                names2 = ( 'linker_CD_' + name, 'linker_CI_' + name )
+                comments2 = ( comment + ' (dependent couplings)', '\n' + comment + ' (independent couplings)' )
+            else:
+                names2 = ( name, )
+                comments2 = ( comment, )
+            indent = ' ' * len( '  %s( ' % names2[0] )
+            for i, name2 in enumerate(names2):
+                out.write('%(comment)s\n%(template)s  %(prefix)s void\n  %(name)s( const %(args)s,\n%(indent)s%(output)s )%(suffix)s' %
+                          {'comment': comments2[i],
+                           'template': template,
+                           'prefix': self.prefix + ( ' INLINE' if 'is_h' in mode else '' ),
+                           'suffix': ( ' ALWAYS_INLINE' if 'is_h' in mode else '' ),
+                           'indent':indent, 'output':output, 'name': name2,
+                           'args': (',\n' + indent + 'const ').join(args)})
+                if 'is_h' in mode or 'linker_decl' in mode:
+                    out.write(';\n')
+                    out.write('\n  //--------------------------------------------------------------------------\n')
+                elif 'linker_impl' in mode:
+                    out.write('\n  {\n')
+                    out.write('    return %(name)s%(template)s( %(args)s, %(output)s );' %
+                              {'name': name,
+                               'template': template_defineCD if i == 0 else template_defineCI,
+                               'output': outputname, 
+                               'args': ', '.join(argnames)})
+                    out.write('\n  }\n')
+                    out.write('\n  //--------------------------------------------------------------------------\n')
+                else:
+                    out.write('\n  {\n')
         return out.getvalue()
 
     # AV - modify aloha_writers.ALOHAWriterForCPP method (improve formatting)
@@ -2210,12 +2219,15 @@ def generate_helas_call(self, argument):
                             if usesdepcoupl is None: usesdepcoupl = False
                             elif usesdepcoupl: raise Exception('PANIC! this call seems to use both aS-dependent and aS-independent couplings?')
             # AV FOR PR #434: CI_ACCESS for independent couplings and CD_ACCESS for dependent couplings
-            if usesdepcoupl is None: raise Exception('PANIC! could not determine if this call uses aS-dependent or aS-independent couplings?')
-            elif usesdepcoupl: caccess = 'CD_ACCESS'
-            else: caccess = 'CI_ACCESS'
+            ###if usesdepcoupl is None: raise Exception('PANIC! could not determine if this call uses aS-dependent or aS-independent couplings?')
+            ###elif usesdepcoupl: caccess = 'CD_ACCESS'
+            ###else: caccess = 'CI_ACCESS'
             ###if arg['routine_name'].endswith( '_0' ) : arg['routine_name'] += '<W_ACCESS, A_ACCESS, %s>'%caccess
             ###else : arg['routine_name'] += '<W_ACCESS, %s>'%caccess
-            arg['routine_name'] = 'helas_' + arg['routine_name']
+            if usesdepcoupl is None: raise Exception('PANIC! could not determine if this call uses aS-dependent or aS-independent couplings?')
+            elif usesdepcoupl: caccess = 'CD_'
+            else: caccess = 'CI_'
+            arg['routine_name'] = 'helas_' + caccess + arg['routine_name']
             if isinstance(argument, helas_objects.HelasWavefunction):
                 #arg['out'] = 'w_sv[%(out)d]'
                 arg['out'] = 'w_fp[%(out)d]'

From a07b914b0318c5f0e110c927fcafae1a868054cf Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Thu, 29 Aug 2024 17:26:16 +0200
Subject: [PATCH 32/50] [helas] regenerate ee_mumu.mad, with the dual series of
 CD_ACCESS and CI_ACCESS

---
 .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt   |  22 +-
 .../ee_mumu.mad/SubProcesses/HelAmps.cc       | 248 +++++++++++-----
 .../SubProcesses/P1_epem_mupmum/CPPProcess.cc |   8 +-
 epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h   | 272 ++++++++++++------
 4 files changed, 391 insertions(+), 159 deletions(-)

diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
index 0676f497e9..c858412080 100644
--- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
+++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
@@ -62,7 +62,7 @@ generate e+ e- > mu+ mu-
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.0057065486907958984 [0m
+[1;32mDEBUG: model prefixing  takes 0.005822181701660156 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -176,8 +176,8 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 
 INFO: Processing color information for process: e+ e- > mu+ mu- @1 
 INFO: Creating files in directory P1_epem_mupmum 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fa457e6f550> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fec058d6cd0> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -194,22 +194,22 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 
 INFO: Finding symmetric diagrams for subprocess group epem_mupmum 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses/P1_epem_mupmum [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 2 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 2 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1626][0m [0m
 Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s
 Wrote files for 8 helas calls in 0.119 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
-ALOHA: aloha creates 3 routines in  0.212 s
+ALOHA: aloha creates 3 routines in  0.208 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
 ALOHA: aloha creates FFV2_4 routines[0m
-ALOHA: aloha creates 7 routines in  0.272 s
+ALOHA: aloha creates 7 routines in  0.266 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2
@@ -252,9 +252,9 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.169s
-user	0m1.894s
-sys	0m0.268s
+real	0m2.165s
+user	0m1.891s
+sys	0m0.269s
 Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/HelAmps.cc
index af338cacc4..fd0e6a6605 100644
--- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/HelAmps.cc
@@ -62,120 +62,240 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] )
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
   {
     return FFV1P0_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV2_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_FFV2_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return FFV2_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV2_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return FFV2_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV2_3( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allV3[] )
+  {
+    return FFV2_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
   __device__ void
-  linker_FFV2_3( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M3,
-                 const fptype W3,
-                 fptype allV3[] )
+  linker_CI_FFV2_3( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allV3[] )
   {
     return FFV2_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV4_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_FFV4_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return FFV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV4_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return FFV4_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV4_3( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allV3[] )
+  {
+    return FFV4_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
   __device__ void
-  linker_FFV4_3( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M3,
-                 const fptype W3,
-                 fptype allV3[] )
+  linker_CI_FFV4_3( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allV3[] )
   {
     return FFV4_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV2_4_0( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allV3[],
-                   const fptype allCOUP1[],
-                   const double Ccoeff1,
-                   const fptype allCOUP2[],
-                   const double Ccoeff2,
-                   fptype allvertexes[] )
+  linker_CD_FFV2_4_0( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allV3[],
+                      const fptype allCOUP1[],
+                      const double Ccoeff1,
+                      const fptype allCOUP2[],
+                      const double Ccoeff2,
+                      fptype allvertexes[] )
+  {
+    return FFV2_4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP1, Ccoeff1, allCOUP2, Ccoeff2, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV2_4_0( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allV3[],
+                      const fptype allCOUP1[],
+                      const double Ccoeff1,
+                      const fptype allCOUP2[],
+                      const double Ccoeff2,
+                      fptype allvertexes[] )
   {
     return FFV2_4_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP1, Ccoeff1, allCOUP2, Ccoeff2, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV2_4_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP1[],
+                      const double Ccoeff1,
+                      const fptype allCOUP2[],
+                      const double Ccoeff2,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV2_4_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP1, Ccoeff1, allCOUP2, Ccoeff2, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
   __device__ void
-  linker_FFV2_4_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP1[],
-                   const double Ccoeff1,
-                   const fptype allCOUP2[],
-                   const double Ccoeff2,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] )
+  linker_CI_FFV2_4_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP1[],
+                      const double Ccoeff1,
+                      const fptype allCOUP2[],
+                      const double Ccoeff2,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
   {
     return FFV2_4_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP1, Ccoeff1, allCOUP2, Ccoeff2, M3, W3, allV3 );
   }
diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc
index 284269a9d8..6bdfc7e4a0 100644
--- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc
+++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc
@@ -332,10 +332,10 @@ namespace mg5amcCpu
 
       oxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][3], +1, w_fp[3], 3 );
 
-      helas_FFV1P0_3( w_fp[1], w_fp[0], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[4] );
+      helas_CI_FFV1P0_3( w_fp[1], w_fp[0], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[2], w_fp[3], w_fp[4], COUPs[ndcoup + 0], 1.0, &amp_fp[0] );
+      helas_CI_FFV1_0( w_fp[2], w_fp[3], w_fp[4], COUPs[ndcoup + 0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -345,10 +345,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 2 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV2_4_3( w_fp[1], w_fp[0], COUPs[ndcoup + 1], 1.0, COUPs[ndcoup + 2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_CI_FFV2_4_3( w_fp[1], w_fp[0], COUPs[ndcoup + 1], 1.0, COUPs[ndcoup + 2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV2_4_0( w_fp[2], w_fp[3], w_fp[4], COUPs[ndcoup + 1], 1.0, COUPs[ndcoup + 2], 1.0, &amp_fp[0] );
+      helas_CI_FFV2_4_0( w_fp[2], w_fp[3], w_fp[4], COUPs[ndcoup + 1], 1.0, COUPs[ndcoup + 2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h
index dbb94120ef..e8ad1c7a51 100644
--- a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h
@@ -1211,121 +1211,233 @@ namespace mg5amcCpu
 
 #ifndef MGONGPU_LINKER_HELAMPS
 
-#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
-#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CI_ACCESS>
-#define helas_FFV2_0 FFV2_0<W_ACCESS, A_ACCESS, CI_ACCESS>
-#define helas_FFV2_3 FFV2_3<W_ACCESS, CI_ACCESS>
-#define helas_FFV4_0 FFV4_0<W_ACCESS, A_ACCESS, CI_ACCESS>
-#define helas_FFV4_3 FFV4_3<W_ACCESS, CI_ACCESS>
-#define helas_FFV2_4_0 FFV2_4_0<W_ACCESS, A_ACCESS, CI_ACCESS>
-#define helas_FFV2_4_3 FFV2_4_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1P0_3 FFV1P0_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV2_0 FFV2_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV2_0 FFV2_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV2_3 FFV2_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV2_3 FFV2_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV4_0 FFV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV4_0 FFV4_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV4_3 FFV4_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV4_3 FFV4_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV2_4_0 FFV2_4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV2_4_0 FFV2_4_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV2_4_3 FFV2_4_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV2_4_3 FFV2_4_3<W_ACCESS, CI_ACCESS>
 
 #else
 
-#define helas_FFV1_0 linker_FFV1_0
-#define helas_FFV1P0_3 linker_FFV1P0_3
-#define helas_FFV2_0 linker_FFV2_0
-#define helas_FFV2_3 linker_FFV2_3
-#define helas_FFV4_0 linker_FFV4_0
-#define helas_FFV4_3 linker_FFV4_3
-#define helas_FFV2_4_0 linker_FFV2_4_0
-#define helas_FFV2_4_3 linker_FFV2_4_3
+#define helas_CD_FFV1_0 linker_CD_FFV1_0
+#define helas_CI_FFV1_0 linker_CI_FFV1_0
+#define helas_CD_FFV1P0_3 linker_CD_FFV1P0_3
+#define helas_CI_FFV1P0_3 linker_CI_FFV1P0_3
+#define helas_CD_FFV2_0 linker_CD_FFV2_0
+#define helas_CI_FFV2_0 linker_CI_FFV2_0
+#define helas_CD_FFV2_3 linker_CD_FFV2_3
+#define helas_CI_FFV2_3 linker_CI_FFV2_3
+#define helas_CD_FFV4_0 linker_CD_FFV4_0
+#define helas_CI_FFV4_0 linker_CI_FFV4_0
+#define helas_CD_FFV4_3 linker_CD_FFV4_3
+#define helas_CI_FFV4_3 linker_CI_FFV4_3
+#define helas_CD_FFV2_4_0 linker_CD_FFV2_4_0
+#define helas_CI_FFV2_4_0 linker_CI_FFV2_4_0
+#define helas_CD_FFV2_4_3 linker_CD_FFV2_4_3
+#define helas_CI_FFV2_4_3 linker_CI_FFV2_4_3
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] );
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
   __device__ void
-  linker_FFV2_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
   __device__ void
-  linker_FFV2_3( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M3,
-                 const fptype W3,
-                 fptype allV3[] );
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV4_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_FFV2_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV4_3( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M3,
-                 const fptype W3,
-                 fptype allV3[] );
+  linker_CI_FFV2_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
   __device__ void
-  linker_FFV2_4_0( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allV3[],
-                   const fptype allCOUP1[],
-                   const double Ccoeff1,
-                   const fptype allCOUP2[],
-                   const double Ccoeff2,
-                   fptype allvertexes[] );
+  linker_CD_FFV2_3( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allV3[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV2_3( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV4_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV4_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV4_3( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV4_3( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV2_4_0( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allV3[],
+                      const fptype allCOUP1[],
+                      const double Ccoeff1,
+                      const fptype allCOUP2[],
+                      const double Ccoeff2,
+                      fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV2_4_0( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allV3[],
+                      const fptype allCOUP1[],
+                      const double Ccoeff1,
+                      const fptype allCOUP2[],
+                      const double Ccoeff2,
+                      fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV2_4_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP1[],
+                      const double Ccoeff1,
+                      const fptype allCOUP2[],
+                      const double Ccoeff2,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
   __device__ void
-  linker_FFV2_4_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP1[],
-                   const double Ccoeff1,
-                   const fptype allCOUP2[],
-                   const double Ccoeff2,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] );
+  linker_CI_FFV2_4_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP1[],
+                      const double Ccoeff1,
+                      const fptype allCOUP2[],
+                      const double Ccoeff2,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
 
   //--------------------------------------------------------------------------
 

From 718a84e5187fc9cd0ec55c0ea45c5a87c56c7ce3 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Thu, 29 Aug 2024 17:36:09 +0200
Subject: [PATCH 33/50] [helas] regenerate all processes after fixing the two
 eemumu issues (one, COUP1/COUP2 instead of COUP; two, CI/CD instead of CD)

---
 .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt   |   16 +-
 .../CODEGEN_cudacpp_ee_mumu_log.txt           |   10 +-
 .../ee_mumu.sa/SubProcesses/HelAmps.cc        |  248 +-
 .../P1_Sigma_sm_epem_mupmum/CPPProcess.cc     |    8 +-
 epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h    |  268 +-
 .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt       |   22 +-
 .../cudacpp/gg_tt.mad/SubProcesses/HelAmps.cc |  121 +-
 .../SubProcesses/P1_gg_ttx/CPPProcess.cc      |   12 +-
 epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h     |  133 +-
 .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt    |   12 +-
 .../cudacpp/gg_tt.sa/SubProcesses/HelAmps.cc  |  121 +-
 .../P1_Sigma_sm_gg_ttx/CPPProcess.cc          |   12 +-
 epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h      |  133 +-
 .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt |   34 +-
 .../gg_tt01g.mad/SubProcesses/HelAmps.cc      |  270 +-
 .../SubProcesses/P1_gg_ttx/CPPProcess.cc      |   12 +-
 .../SubProcesses/P2_gg_ttxg/CPPProcess.cc     |   62 +-
 epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h  |  283 +-
 .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt     |   28 +-
 .../gg_ttg.mad/SubProcesses/HelAmps.cc        |  270 +-
 .../SubProcesses/P1_gg_ttxg/CPPProcess.cc     |   62 +-
 epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h    |  283 +-
 .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt  |   12 +-
 .../cudacpp/gg_ttg.sa/SubProcesses/HelAmps.cc |  270 +-
 .../P1_Sigma_sm_gg_ttxg/CPPProcess.cc         |   62 +-
 epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h     |  283 +-
 .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt   |   26 +-
 .../gg_ttgg.mad/SubProcesses/HelAmps.cc       |  375 +-
 .../SubProcesses/P1_gg_ttxgg/CPPProcess.cc    |  432 +-
 epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h   |  395 +-
 .../CODEGEN_cudacpp_gg_ttgg_log.txt           |   14 +-
 .../gg_ttgg.sa/SubProcesses/HelAmps.cc        |  375 +-
 .../P1_Sigma_sm_gg_ttxgg/CPPProcess.cc        |  432 +-
 epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h    |  395 +-
 .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt |   28 +-
 .../gg_ttggg.mad/SubProcesses/HelAmps.cc      |  375 +-
 .../SubProcesses/P1_gg_ttxggg/CPPProcess.cc   | 4548 ++++++++---------
 epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h  |  395 +-
 .../CODEGEN_cudacpp_gg_ttggg_log.txt          |   14 +-
 .../gg_ttggg.sa/SubProcesses/HelAmps.cc       |  375 +-
 .../P1_Sigma_sm_gg_ttxggg/CPPProcess.cc       | 4548 ++++++++---------
 epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h   |  395 +-
 .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt     |   36 +-
 .../gq_ttq.mad/SubProcesses/HelAmps.cc        |  149 +-
 .../SubProcesses/P1_gu_ttxu/CPPProcess.cc     |   22 +-
 .../SubProcesses/P1_gux_ttxux/CPPProcess.cc   |   22 +-
 epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h    |  164 +-
 .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt  |   10 +-
 .../cudacpp/gq_ttq.sa/SubProcesses/HelAmps.cc |  149 +-
 .../P1_Sigma_sm_gu_ttxu/CPPProcess.cc         |   22 +-
 .../P1_Sigma_sm_gux_ttxux/CPPProcess.cc       |   22 +-
 epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h     |  164 +-
 .../CODEGEN_mad_heft_gg_bb_log.txt            |   22 +-
 .../heft_gg_bb.mad/SubProcesses/HelAmps.cc    |  180 +-
 .../SubProcesses/P1_gg_bbx/CPPProcess.cc      |   16 +-
 .../cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h |  198 +-
 .../CODEGEN_cudacpp_heft_gg_bb_log.txt        |    6 +-
 .../heft_gg_bb.sa/SubProcesses/HelAmps.cc     |  180 +-
 .../P1_Sigma_heft_gg_bbx/CPPProcess.cc        |   16 +-
 .../cudacpp/heft_gg_bb.sa/src/HelAmps_heft.h  |  198 +-
 .../CODEGEN_mad_pp_tt012j_log.txt             |  200 +-
 .../pp_tt012j.mad/SubProcesses/HelAmps.cc     |  375 +-
 .../SubProcesses/P0_gg_ttx/CPPProcess.cc      |   12 +-
 .../SubProcesses/P0_uux_ttx/CPPProcess.cc     |    4 +-
 .../SubProcesses/P1_gg_ttxg/CPPProcess.cc     |   62 +-
 .../SubProcesses/P1_gu_ttxu/CPPProcess.cc     |   22 +-
 .../SubProcesses/P1_gux_ttxux/CPPProcess.cc   |   22 +-
 .../SubProcesses/P1_uux_ttxg/CPPProcess.cc    |   22 +-
 .../SubProcesses/P2_gg_ttxgg/CPPProcess.cc    |  432 +-
 .../SubProcesses/P2_gg_ttxuux/CPPProcess.cc   |  134 +-
 .../SubProcesses/P2_gu_ttxgu/CPPProcess.cc    |  134 +-
 .../SubProcesses/P2_gux_ttxgux/CPPProcess.cc  |  134 +-
 .../SubProcesses/P2_uc_ttxuc/CPPProcess.cc    |   32 +-
 .../SubProcesses/P2_ucx_ttxucx/CPPProcess.cc  |   32 +-
 .../SubProcesses/P2_uu_ttxuu/CPPProcess.cc    |   60 +-
 .../SubProcesses/P2_uux_ttxccx/CPPProcess.cc  |   32 +-
 .../SubProcesses/P2_uux_ttxgg/CPPProcess.cc   |  134 +-
 .../SubProcesses/P2_uux_ttxuux/CPPProcess.cc  |   60 +-
 .../P2_uxcx_ttxuxcx/CPPProcess.cc             |   32 +-
 .../P2_uxux_ttxuxux/CPPProcess.cc             |   60 +-
 epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h |  395 +-
 .../CODEGEN_mad_smeft_gg_tttt_log.txt         |   22 +-
 .../smeft_gg_tttt.mad/SubProcesses/HelAmps.cc |  273 +-
 .../SubProcesses/P1_gg_ttxttx/CPPProcess.cc   |  226 +-
 .../SubProcesses/P1_gg_ttxttx/matrix1.pdf     |  Bin 375126 -> 375126 bytes
 .../HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h    |  300 +-
 .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt     |   14 +-
 .../smeft_gg_tttt.sa/SubProcesses/HelAmps.cc  |  273 +-
 .../CPPProcess.cc                             |  226 +-
 .../HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h    |  300 +-
 .../CODEGEN_mad_susy_gg_t1t1_log.txt          |   22 +-
 .../susy_gg_t1t1.mad/SubProcesses/HelAmps.cc  |  152 +-
 .../SubProcesses/P1_gg_t1t1x/CPPProcess.cc    |   24 +-
 .../susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h |  167 +-
 .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt      |   10 +-
 .../susy_gg_t1t1.sa/SubProcesses/HelAmps.cc   |  152 +-
 .../CPPProcess.cc                             |   24 +-
 .../susy_gg_t1t1.sa/src/HelAmps_MSSM_SLHA2.h  |  167 +-
 .../CODEGEN_mad_susy_gg_tt_log.txt            |   22 +-
 .../susy_gg_tt.mad/SubProcesses/HelAmps.cc    |  121 +-
 .../SubProcesses/P1_gg_ttx/CPPProcess.cc      |   12 +-
 .../susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h   |  133 +-
 .../CODEGEN_cudacpp_susy_gg_tt_log.txt        |   10 +-
 .../susy_gg_tt.sa/SubProcesses/HelAmps.cc     |  121 +-
 .../P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc  |   12 +-
 .../susy_gg_tt.sa/src/HelAmps_MSSM_SLHA2.h    |  133 +-
 106 files changed, 13854 insertions(+), 9197 deletions(-)

diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
index c858412080..88af428730 100644
--- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
+++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
@@ -62,7 +62,7 @@ generate e+ e- > mu+ mu-
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005822181701660156 [0m
+[1;32mDEBUG: model prefixing  takes 0.005793333053588867 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -177,7 +177,7 @@ INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1
 INFO: Processing color information for process: e+ e- > mu+ mu- @1 
 INFO: Creating files in directory P1_epem_mupmum 
 [1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fec058d6cd0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fddd6802550> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -198,18 +198,18 @@ INFO: Finding symmetric diagrams for subprocess group epem_mupmum
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1625][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1626][0m [0m
 Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s
-Wrote files for 8 helas calls in 0.119 s
+Wrote files for 8 helas calls in 0.118 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
-ALOHA: aloha creates 3 routines in  0.208 s
+ALOHA: aloha creates 3 routines in  0.207 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
 ALOHA: aloha creates FFV2_4 routines[0m
-ALOHA: aloha creates 7 routines in  0.266 s
+ALOHA: aloha creates 7 routines in  0.262 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2
@@ -252,9 +252,9 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.165s
-user	0m1.891s
-sys	0m0.269s
+real	0m2.143s
+user	0m1.874s
+sys	0m0.265s
 Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
index e54e839724..5ca2772f5f 100644
--- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
+++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
@@ -62,7 +62,7 @@ generate e+ e- > mu+ mu-
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005738258361816406 [0m
+[1;32mDEBUG: model prefixing  takes 0.005778312683105469 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -154,7 +154,7 @@ INFO: Checking for minimal orders which gives processes.
 INFO: Please specify coupling orders to bypass this step. 
 INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1  
 INFO: Process has 2 diagrams 
-1 processes with 2 diagrams generated in 0.004 s
+1 processes with 2 diagrams generated in 0.005 s
 Total: 1 processes with 2 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_ee_mumu
 Load PLUGIN.CUDACPP_OUTPUT
@@ -205,6 +205,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu
 quit
 
 real	0m0.686s
-user	0m0.624s
-sys	0m0.053s
-Code generation completed in 1 seconds
+user	0m0.625s
+sys	0m0.055s
+Code generation completed in 0 seconds
diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/HelAmps.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/HelAmps.cc
index 43c887914f..fd0e6a6605 100644
--- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/HelAmps.cc
@@ -62,118 +62,242 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] )
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
   {
     return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV2_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_FFV2_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return FFV2_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV2_3( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M3,
-                 const fptype W3,
-                 fptype allV3[] )
+  linker_CI_FFV2_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return FFV2_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV2_3( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allV3[] )
   {
     return FFV2_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV2_3( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allV3[] )
+  {
+    return FFV2_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV4_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_FFV4_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return FFV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV4_3( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M3,
-                 const fptype W3,
-                 fptype allV3[] )
+  linker_CI_FFV4_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return FFV4_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV4_3( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allV3[] )
   {
     return FFV4_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV4_3( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allV3[] )
+  {
+    return FFV4_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV2_4_0( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allV3[],
+                      const fptype allCOUP1[],
+                      const double Ccoeff1,
+                      const fptype allCOUP2[],
+                      const double Ccoeff2,
+                      fptype allvertexes[] )
+  {
+    return FFV2_4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP1, Ccoeff1, allCOUP2, Ccoeff2, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV2_4_0( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allV3[],
+                      const fptype allCOUP1[],
+                      const double Ccoeff1,
+                      const fptype allCOUP2[],
+                      const double Ccoeff2,
+                      fptype allvertexes[] )
+  {
+    return FFV2_4_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP1, Ccoeff1, allCOUP2, Ccoeff2, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
   __device__ void
-  linker_FFV2_4_0( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   fptype allvertexes[] )
+  linker_CD_FFV2_4_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP1[],
+                      const double Ccoeff1,
+                      const fptype allCOUP2[],
+                      const double Ccoeff2,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
   {
-    return FFV2_4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+    return FFV2_4_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP1, Ccoeff1, allCOUP2, Ccoeff2, M3, W3, allV3 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
   __device__ void
-  linker_FFV2_4_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] )
+  linker_CI_FFV2_4_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP1[],
+                      const double Ccoeff1,
+                      const fptype allCOUP2[],
+                      const double Ccoeff2,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
   {
-    return FFV2_4_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+    return FFV2_4_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP1, Ccoeff1, allCOUP2, Ccoeff2, M3, W3, allV3 );
   }
 
   //--------------------------------------------------------------------------
diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc
index 7835cfcc44..b3b722c129 100644
--- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc
+++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc
@@ -332,10 +332,10 @@ namespace mg5amcCpu
 
       oxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][3], +1, w_fp[3], 3 );
 
-      helas_FFV1P0_3( w_fp[1], w_fp[0], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[4] );
+      helas_CI_FFV1P0_3( w_fp[1], w_fp[0], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[2], w_fp[3], w_fp[4], COUPs[ndcoup + 0], 1.0, &amp_fp[0] );
+      helas_CI_FFV1_0( w_fp[2], w_fp[3], w_fp[4], COUPs[ndcoup + 0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -344,10 +344,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 2 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV2_4_3( w_fp[1], w_fp[0], COUPs[ndcoup + 1], 1.0, COUPs[ndcoup + 2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_CI_FFV2_4_3( w_fp[1], w_fp[0], COUPs[ndcoup + 1], 1.0, COUPs[ndcoup + 2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV2_4_0( w_fp[2], w_fp[3], w_fp[4], COUPs[ndcoup + 1], 1.0, COUPs[ndcoup + 2], 1.0, &amp_fp[0] );
+      helas_CI_FFV2_4_0( w_fp[2], w_fp[3], w_fp[4], COUPs[ndcoup + 1], 1.0, COUPs[ndcoup + 2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
diff --git a/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h b/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h
index a392267d1b..e8ad1c7a51 100644
--- a/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h
+++ b/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h
@@ -1211,117 +1211,233 @@ namespace mg5amcCpu
 
 #ifndef MGONGPU_LINKER_HELAMPS
 
-#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
-#define helas_FFV2_0 FFV2_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV2_3 FFV2_3<W_ACCESS, CD_ACCESS>
-#define helas_FFV4_0 FFV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV4_3 FFV4_3<W_ACCESS, CD_ACCESS>
-#define helas_FFV2_4_0 FFV2_4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV2_4_3 FFV2_4_3<W_ACCESS, CD_ACCESS>
+#define helas_CD_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1P0_3 FFV1P0_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV2_0 FFV2_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV2_0 FFV2_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV2_3 FFV2_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV2_3 FFV2_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV4_0 FFV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV4_0 FFV4_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV4_3 FFV4_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV4_3 FFV4_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV2_4_0 FFV2_4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV2_4_0 FFV2_4_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV2_4_3 FFV2_4_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV2_4_3 FFV2_4_3<W_ACCESS, CI_ACCESS>
 
 #else
 
-#define helas_FFV1_0 linker_FFV1_0
-#define helas_FFV1P0_3 linker_FFV1P0_3
-#define helas_FFV2_0 linker_FFV2_0
-#define helas_FFV2_3 linker_FFV2_3
-#define helas_FFV4_0 linker_FFV4_0
-#define helas_FFV4_3 linker_FFV4_3
-#define helas_FFV2_4_0 linker_FFV2_4_0
-#define helas_FFV2_4_3 linker_FFV2_4_3
+#define helas_CD_FFV1_0 linker_CD_FFV1_0
+#define helas_CI_FFV1_0 linker_CI_FFV1_0
+#define helas_CD_FFV1P0_3 linker_CD_FFV1P0_3
+#define helas_CI_FFV1P0_3 linker_CI_FFV1P0_3
+#define helas_CD_FFV2_0 linker_CD_FFV2_0
+#define helas_CI_FFV2_0 linker_CI_FFV2_0
+#define helas_CD_FFV2_3 linker_CD_FFV2_3
+#define helas_CI_FFV2_3 linker_CI_FFV2_3
+#define helas_CD_FFV4_0 linker_CD_FFV4_0
+#define helas_CI_FFV4_0 linker_CI_FFV4_0
+#define helas_CD_FFV4_3 linker_CD_FFV4_3
+#define helas_CI_FFV4_3 linker_CI_FFV4_3
+#define helas_CD_FFV2_4_0 linker_CD_FFV2_4_0
+#define helas_CI_FFV2_4_0 linker_CI_FFV2_4_0
+#define helas_CD_FFV2_4_3 linker_CD_FFV2_4_3
+#define helas_CI_FFV2_4_3 linker_CI_FFV2_4_3
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] );
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
   __device__ void
-  linker_FFV2_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
   __device__ void
-  linker_FFV2_3( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M3,
-                 const fptype W3,
-                 fptype allV3[] );
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV4_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_FFV2_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV4_3( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M3,
-                 const fptype W3,
-                 fptype allV3[] );
+  linker_CI_FFV2_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
   __device__ void
-  linker_FFV2_4_0( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   fptype allvertexes[] );
+  linker_CD_FFV2_3( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allV3[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV2_3( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV4_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV4_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV4_3( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV4_3( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV2_4_0( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allV3[],
+                      const fptype allCOUP1[],
+                      const double Ccoeff1,
+                      const fptype allCOUP2[],
+                      const double Ccoeff2,
+                      fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV2_4_0( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allV3[],
+                      const fptype allCOUP1[],
+                      const double Ccoeff1,
+                      const fptype allCOUP2[],
+                      const double Ccoeff2,
+                      fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV2_4_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP1[],
+                      const double Ccoeff1,
+                      const fptype allCOUP2[],
+                      const double Ccoeff2,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
   __device__ void
-  linker_FFV2_4_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] );
+  linker_CI_FFV2_4_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP1[],
+                      const double Ccoeff1,
+                      const fptype allCOUP2[],
+                      const double Ccoeff2,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
 
   //--------------------------------------------------------------------------
 
diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
index a1c9fae589..f321315aec 100644
--- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
+++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005675554275512695 [0m
+[1;32mDEBUG: model prefixing  takes 0.005693197250366211 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -177,8 +177,8 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
 INFO: Creating files in directory P1_gg_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fdad2083e80> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fcbba8dde80> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -194,19 +194,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1626][0m [0m
 Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
-Wrote files for 10 helas calls in 0.121 s
+Wrote files for 10 helas calls in 0.120 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.150 s
+ALOHA: aloha creates 2 routines in  0.152 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 4 routines in  0.136 s
+ALOHA: aloha creates 4 routines in  0.138 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -244,8 +244,8 @@ Run "open index.html" to see more information about this process.
 quit
 
 real	0m1.975s
-user	0m1.709s
-sys	0m0.264s
+user	0m1.699s
+sys	0m0.276s
 Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/HelAmps.cc
index 79486e92f0..426da5a2c2 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/HelAmps.cc
@@ -62,63 +62,122 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] )
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
     return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] )
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
   {
     return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] )
+  linker_CI_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CI_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
   {
     return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
   }
 
   //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CI_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
 }
 #endif
diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
index 576ea7cb4d..3dfd554481 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
@@ -332,10 +332,10 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 );
 
-      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -346,10 +346,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 3 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -359,10 +359,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 3 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 3
-      helas_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h
index 574dd3755c..6e216e2406 100644
--- a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h
@@ -1036,64 +1036,119 @@ namespace mg5amcCpu
 
 #ifndef MGONGPU_LINKER_HELAMPS
 
-#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_CD_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1P0_1 VVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_1 FFV1_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_2 FFV1_2<W_ACCESS, CI_ACCESS>
 
 #else
 
-#define helas_VVV1P0_1 linker_VVV1P0_1
-#define helas_FFV1_0 linker_FFV1_0
-#define helas_FFV1_1 linker_FFV1_1
-#define helas_FFV1_2 linker_FFV1_2
+#define helas_CD_VVV1P0_1 linker_CD_VVV1P0_1
+#define helas_CI_VVV1P0_1 linker_CI_VVV1P0_1
+#define helas_CD_FFV1_0 linker_CD_FFV1_0
+#define helas_CI_FFV1_0 linker_CI_FFV1_0
+#define helas_CD_FFV1_1 linker_CD_FFV1_1
+#define helas_CI_FFV1_1 linker_CI_FFV1_1
+#define helas_CD_FFV1_2 linker_CD_FFV1_2
+#define helas_CI_FFV1_2 linker_CI_FFV1_2
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] );
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] );
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] );
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
 
   //--------------------------------------------------------------------------
 
diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt
index 3d103ae0db..36e8aed83f 100644
--- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt
+++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005746364593505859 [0m
+[1;32mDEBUG: model prefixing  takes 0.005682468414306641 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.008 s
+1 processes with 3 diagrams generated in 0.009 s
 Total: 1 processes with 3 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt
 Load PLUGIN.CUDACPP_OUTPUT
@@ -182,7 +182,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.152 s
+ALOHA: aloha creates 2 routines in  0.149 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -199,7 +199,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. 
 quit
 
-real	0m0.560s
-user	0m0.499s
-sys	0m0.055s
+real	0m0.559s
+user	0m0.504s
+sys	0m0.051s
 Code generation completed in 0 seconds
diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/HelAmps.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/HelAmps.cc
index 79486e92f0..426da5a2c2 100644
--- a/epochX/cudacpp/gg_tt.sa/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/HelAmps.cc
@@ -62,63 +62,122 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] )
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
     return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] )
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
   {
     return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] )
+  linker_CI_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CI_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
   {
     return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
   }
 
   //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CI_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
 }
 #endif
diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc
index cda3e64ada..d64c26249c 100644
--- a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc
+++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc
@@ -332,10 +332,10 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 );
 
-      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -345,10 +345,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 3 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -357,10 +357,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 3 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 3
-      helas_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
diff --git a/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h
index 574dd3755c..6e216e2406 100644
--- a/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h
@@ -1036,64 +1036,119 @@ namespace mg5amcCpu
 
 #ifndef MGONGPU_LINKER_HELAMPS
 
-#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_CD_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1P0_1 VVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_1 FFV1_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_2 FFV1_2<W_ACCESS, CI_ACCESS>
 
 #else
 
-#define helas_VVV1P0_1 linker_VVV1P0_1
-#define helas_FFV1_0 linker_FFV1_0
-#define helas_FFV1_1 linker_FFV1_1
-#define helas_FFV1_2 linker_FFV1_2
+#define helas_CD_VVV1P0_1 linker_CD_VVV1P0_1
+#define helas_CI_VVV1P0_1 linker_CI_VVV1P0_1
+#define helas_CD_FFV1_0 linker_CD_FFV1_0
+#define helas_CI_FFV1_0 linker_CI_FFV1_0
+#define helas_CD_FFV1_1 linker_CD_FFV1_1
+#define helas_CI_FFV1_1 linker_CI_FFV1_1
+#define helas_CD_FFV1_2 linker_CD_FFV1_2
+#define helas_CI_FFV1_2 linker_CI_FFV1_2
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] );
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] );
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] );
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
 
   //--------------------------------------------------------------------------
 
diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt
index 6f11cac977..ee9cfb416f 100644
--- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt
+++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005694866180419922 [0m
+[1;32mDEBUG: model prefixing  takes 0.005636453628540039 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -187,8 +187,8 @@ INFO: Processing color information for process: g g > t t~ g @2
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
 INFO: Creating files in directory P2_gg_ttxg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fad7ad3f970> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fa48afd9970> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -204,12 +204,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxg 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P2_gg_ttxg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1626][0m [0m
 INFO: Creating files in directory P1_gg_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fad7ad3fac0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fa48afd9ac0> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -225,25 +225,25 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1626][0m [0m
 Generated helas calls for 2 subprocesses (19 diagrams) in 0.044 s
-Wrote files for 46 helas calls in 0.281 s
+Wrote files for 46 helas calls in 0.280 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 5 routines in  0.339 s
+ALOHA: aloha creates 5 routines in  0.338 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 10 routines in  0.320 s
+ALOHA: aloha creates 10 routines in  0.768 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -290,9 +290,9 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.743s
-user	0m2.420s
-sys	0m0.321s
+real	0m3.190s
+user	0m2.460s
+sys	0m0.288s
 Code generation completed in 3 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/HelAmps.cc
index fbad4a8555..7383a7f24f 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/HelAmps.cc
@@ -62,140 +62,276 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] )
+  linker_CI_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+    return VVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
-    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] )
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
-    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+    return VVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] )
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] )
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+    return FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV1P0_1( const fptype allV2[],
+  linker_CD_FFV1_1( const fptype allF2[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
                     const fptype M1,
                     const fptype W1,
-                    fptype allV1[] )
+                    fptype allF1[] )
   {
-    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV3P0_1( const fptype allV2[],
+  linker_CI_FFV1_1( const fptype allF2[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
                     const fptype M1,
                     const fptype W1,
-                    fptype allV1[] )
+                    fptype allF1[] )
   {
-    return VVVV3P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+    return FFV1_1<W_ACCESS, CI_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV4P0_1( const fptype allV2[],
+  linker_CD_FFV1_2( const fptype allF1[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] )
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CI_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV3P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV3P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
   {
     return VVVV4P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV4P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
 }
 #endif
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
index 576ea7cb4d..3dfd554481 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
@@ -332,10 +332,10 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 );
 
-      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -346,10 +346,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 3 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -359,10 +359,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 3 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 3
-      helas_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc
index 3714bf4dce..82fe2d6567 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc
@@ -334,11 +334,11 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 );
 
-      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] );
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      helas_VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -351,10 +351,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 16 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -365,10 +365,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 16 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      helas_FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -379,11 +379,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 16 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -393,10 +393,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 16 ***
 
       // Wavefunction(s) for diagram number 5
-      helas_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 5
-      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -410,7 +410,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      helas_FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -420,11 +420,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 16 ***
 
       // Wavefunction(s) for diagram number 7
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
 
       // Amplitude(s) for diagram number 7
-      helas_FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -437,7 +437,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 8
-      helas_FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -451,7 +451,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 9
-      helas_FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -461,10 +461,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 16 ***
 
       // Wavefunction(s) for diagram number 10
-      helas_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 10
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -478,7 +478,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 11
-      helas_FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -492,7 +492,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 12
-      helas_VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -508,7 +508,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 13
-      helas_FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -521,7 +521,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      helas_FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -534,7 +534,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 15
-      helas_VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -547,22 +547,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 16 OF 16 ***
 
       // Wavefunction(s) for diagram number 16
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 16
-      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[4] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
diff --git a/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h
index 5efae129bb..3663a394da 100644
--- a/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h
@@ -1275,136 +1275,263 @@ namespace mg5amcCpu
 
 #ifndef MGONGPU_LINKER_HELAMPS
 
-#define helas_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
-#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
-#define helas_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
-#define helas_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CD_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1P0_1 VVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_1 FFV1_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_2 FFV1_2<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1P0_3 FFV1P0_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CI_ACCESS>
 
 #else
 
-#define helas_VVV1_0 linker_VVV1_0
-#define helas_VVV1P0_1 linker_VVV1P0_1
-#define helas_FFV1_0 linker_FFV1_0
-#define helas_FFV1_1 linker_FFV1_1
-#define helas_FFV1_2 linker_FFV1_2
-#define helas_FFV1P0_3 linker_FFV1P0_3
-#define helas_VVVV1P0_1 linker_VVVV1P0_1
-#define helas_VVVV3P0_1 linker_VVVV3P0_1
-#define helas_VVVV4P0_1 linker_VVVV4P0_1
+#define helas_CD_VVV1_0 linker_CD_VVV1_0
+#define helas_CI_VVV1_0 linker_CI_VVV1_0
+#define helas_CD_VVV1P0_1 linker_CD_VVV1P0_1
+#define helas_CI_VVV1P0_1 linker_CI_VVV1P0_1
+#define helas_CD_FFV1_0 linker_CD_FFV1_0
+#define helas_CI_FFV1_0 linker_CI_FFV1_0
+#define helas_CD_FFV1_1 linker_CD_FFV1_1
+#define helas_CI_FFV1_1 linker_CI_FFV1_1
+#define helas_CD_FFV1_2 linker_CD_FFV1_2
+#define helas_CI_FFV1_2 linker_CI_FFV1_2
+#define helas_CD_FFV1P0_3 linker_CD_FFV1P0_3
+#define helas_CI_FFV1P0_3 linker_CI_FFV1P0_3
+#define helas_CD_VVVV1P0_1 linker_CD_VVVV1P0_1
+#define helas_CI_VVVV1P0_1 linker_CI_VVVV1P0_1
+#define helas_CD_VVVV3P0_1 linker_CD_VVVV3P0_1
+#define helas_CI_VVVV3P0_1 linker_CI_VVVV3P0_1
+#define helas_CD_VVVV4P0_1 linker_CD_VVVV4P0_1
+#define helas_CI_VVVV4P0_1 linker_CI_VVVV4P0_1
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] );
+  linker_CI_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] );
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] );
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] );
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV1P0_1( const fptype allV2[],
+  linker_CD_FFV1_1( const fptype allF2[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
                     const fptype M1,
                     const fptype W1,
-                    fptype allV1[] );
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV3P0_1( const fptype allV2[],
+  linker_CI_FFV1_1( const fptype allF2[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
                     const fptype M1,
                     const fptype W1,
-                    fptype allV1[] );
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV4P0_1( const fptype allV2[],
+  linker_CD_FFV1_2( const fptype allF1[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] );
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt
index 2635bfe901..401f85e77f 100644
--- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt
+++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~ g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005773782730102539 [0m
+[1;32mDEBUG: model prefixing  takes 0.005800962448120117 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1  
 INFO: Process has 16 diagrams 
-1 processes with 16 diagrams generated in 0.022 s
+1 processes with 16 diagrams generated in 0.023 s
 Total: 1 processes with 16 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -177,8 +177,8 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 
 INFO: Processing color information for process: g g > t t~ g @1 
 INFO: Creating files in directory P1_gg_ttxg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f789b931b50> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f4503b43b50> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -194,11 +194,11 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxg 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1614][0m [0m
-Generated helas calls for 1 subprocesses (16 diagrams) in 0.040 s
-Wrote files for 36 helas calls in 0.171 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1626][0m [0m
+Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s
+Wrote files for 36 helas calls in 0.169 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
@@ -212,7 +212,7 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 10 routines in  0.322 s
+ALOHA: aloha creates 10 routines in  0.323 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -255,10 +255,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.549s
-user	0m2.267s
-sys	0m0.281s
-Code generation completed in 3 seconds
+real	0m2.547s
+user	0m2.261s
+sys	0m0.286s
+Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/HelAmps.cc
index fbad4a8555..7383a7f24f 100644
--- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/HelAmps.cc
@@ -62,140 +62,276 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] )
+  linker_CI_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+    return VVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
-    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] )
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
-    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+    return VVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] )
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] )
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+    return FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV1P0_1( const fptype allV2[],
+  linker_CD_FFV1_1( const fptype allF2[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
                     const fptype M1,
                     const fptype W1,
-                    fptype allV1[] )
+                    fptype allF1[] )
   {
-    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV3P0_1( const fptype allV2[],
+  linker_CI_FFV1_1( const fptype allF2[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
                     const fptype M1,
                     const fptype W1,
-                    fptype allV1[] )
+                    fptype allF1[] )
   {
-    return VVVV3P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+    return FFV1_1<W_ACCESS, CI_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV4P0_1( const fptype allV2[],
+  linker_CD_FFV1_2( const fptype allF1[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] )
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CI_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV3P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV3P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
   {
     return VVVV4P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV4P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
 }
 #endif
diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc
index 3fa4e019da..6ff382a87e 100644
--- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc
+++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc
@@ -334,11 +334,11 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 );
 
-      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] );
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      helas_VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -351,10 +351,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 16 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -365,10 +365,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 16 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      helas_FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -379,11 +379,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 16 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -393,10 +393,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 16 ***
 
       // Wavefunction(s) for diagram number 5
-      helas_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 5
-      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -410,7 +410,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      helas_FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -420,11 +420,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 16 ***
 
       // Wavefunction(s) for diagram number 7
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
 
       // Amplitude(s) for diagram number 7
-      helas_FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -437,7 +437,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 8
-      helas_FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -451,7 +451,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 9
-      helas_FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -461,10 +461,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 16 ***
 
       // Wavefunction(s) for diagram number 10
-      helas_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 10
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -478,7 +478,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 11
-      helas_FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -492,7 +492,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 12
-      helas_VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -508,7 +508,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 13
-      helas_FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -521,7 +521,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      helas_FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -534,7 +534,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 15
-      helas_VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -547,22 +547,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 16 OF 16 ***
 
       // Wavefunction(s) for diagram number 16
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 16
-      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[4] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
diff --git a/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h
index 5efae129bb..3663a394da 100644
--- a/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h
@@ -1275,136 +1275,263 @@ namespace mg5amcCpu
 
 #ifndef MGONGPU_LINKER_HELAMPS
 
-#define helas_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
-#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
-#define helas_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
-#define helas_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CD_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1P0_1 VVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_1 FFV1_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_2 FFV1_2<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1P0_3 FFV1P0_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CI_ACCESS>
 
 #else
 
-#define helas_VVV1_0 linker_VVV1_0
-#define helas_VVV1P0_1 linker_VVV1P0_1
-#define helas_FFV1_0 linker_FFV1_0
-#define helas_FFV1_1 linker_FFV1_1
-#define helas_FFV1_2 linker_FFV1_2
-#define helas_FFV1P0_3 linker_FFV1P0_3
-#define helas_VVVV1P0_1 linker_VVVV1P0_1
-#define helas_VVVV3P0_1 linker_VVVV3P0_1
-#define helas_VVVV4P0_1 linker_VVVV4P0_1
+#define helas_CD_VVV1_0 linker_CD_VVV1_0
+#define helas_CI_VVV1_0 linker_CI_VVV1_0
+#define helas_CD_VVV1P0_1 linker_CD_VVV1P0_1
+#define helas_CI_VVV1P0_1 linker_CI_VVV1P0_1
+#define helas_CD_FFV1_0 linker_CD_FFV1_0
+#define helas_CI_FFV1_0 linker_CI_FFV1_0
+#define helas_CD_FFV1_1 linker_CD_FFV1_1
+#define helas_CI_FFV1_1 linker_CI_FFV1_1
+#define helas_CD_FFV1_2 linker_CD_FFV1_2
+#define helas_CI_FFV1_2 linker_CI_FFV1_2
+#define helas_CD_FFV1P0_3 linker_CD_FFV1P0_3
+#define helas_CI_FFV1P0_3 linker_CI_FFV1P0_3
+#define helas_CD_VVVV1P0_1 linker_CD_VVVV1P0_1
+#define helas_CI_VVVV1P0_1 linker_CI_VVVV1P0_1
+#define helas_CD_VVVV3P0_1 linker_CD_VVVV3P0_1
+#define helas_CI_VVVV3P0_1 linker_CI_VVVV3P0_1
+#define helas_CD_VVVV4P0_1 linker_CD_VVVV4P0_1
+#define helas_CI_VVVV4P0_1 linker_CI_VVVV4P0_1
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] );
+  linker_CI_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] );
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] );
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] );
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV1P0_1( const fptype allV2[],
+  linker_CD_FFV1_1( const fptype allF2[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
                     const fptype M1,
                     const fptype W1,
-                    fptype allV1[] );
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV3P0_1( const fptype allV2[],
+  linker_CI_FFV1_1( const fptype allF2[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
                     const fptype M1,
                     const fptype W1,
-                    fptype allV1[] );
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV4P0_1( const fptype allV2[],
+  linker_CD_FFV1_2( const fptype allF1[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] );
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt
index a38fcc455a..bb9ce0f548 100644
--- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt
+++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~ g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005728244781494141 [0m
+[1;32mDEBUG: model prefixing  takes 0.0057353973388671875 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1  
 INFO: Process has 16 diagrams 
-1 processes with 16 diagrams generated in 0.022 s
+1 processes with 16 diagrams generated in 0.023 s
 Total: 1 processes with 16 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg
 Load PLUGIN.CUDACPP_OUTPUT
@@ -185,7 +185,7 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 5 routines in  0.334 s
+ALOHA: aloha creates 5 routines in  0.340 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -207,7 +207,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. 
 quit
 
-real	0m0.840s
-user	0m0.736s
-sys	0m0.067s
+real	0m0.812s
+user	0m0.751s
+sys	0m0.056s
 Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/HelAmps.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/HelAmps.cc
index fbad4a8555..7383a7f24f 100644
--- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/HelAmps.cc
@@ -62,140 +62,276 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] )
+  linker_CI_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+    return VVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
-    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] )
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
-    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+    return VVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] )
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] )
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+    return FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV1P0_1( const fptype allV2[],
+  linker_CD_FFV1_1( const fptype allF2[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
                     const fptype M1,
                     const fptype W1,
-                    fptype allV1[] )
+                    fptype allF1[] )
   {
-    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV3P0_1( const fptype allV2[],
+  linker_CI_FFV1_1( const fptype allF2[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
                     const fptype M1,
                     const fptype W1,
-                    fptype allV1[] )
+                    fptype allF1[] )
   {
-    return VVVV3P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+    return FFV1_1<W_ACCESS, CI_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV4P0_1( const fptype allV2[],
+  linker_CD_FFV1_2( const fptype allF1[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] )
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CI_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV3P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV3P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
   {
     return VVVV4P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV4P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
 }
 #endif
diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc
index 23621b7b68..d8876e204b 100644
--- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc
+++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc
@@ -334,11 +334,11 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 );
 
-      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] );
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      helas_VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -350,10 +350,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 16 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -363,10 +363,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 16 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      helas_FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -376,11 +376,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 16 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -389,10 +389,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 16 ***
 
       // Wavefunction(s) for diagram number 5
-      helas_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 5
-      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -405,7 +405,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      helas_FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -414,11 +414,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 16 ***
 
       // Wavefunction(s) for diagram number 7
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
 
       // Amplitude(s) for diagram number 7
-      helas_FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -430,7 +430,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 8
-      helas_FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -443,7 +443,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 9
-      helas_FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -452,10 +452,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 16 ***
 
       // Wavefunction(s) for diagram number 10
-      helas_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 10
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -468,7 +468,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 11
-      helas_FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -481,7 +481,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 12
-      helas_VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -496,7 +496,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 13
-      helas_FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -508,7 +508,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      helas_FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -520,7 +520,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 15
-      helas_VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -532,12 +532,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 16 OF 16 ***
 
       // Wavefunction(s) for diagram number 16
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 16
-      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -545,7 +545,7 @@ namespace mg5amcCpu
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -553,7 +553,7 @@ namespace mg5amcCpu
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[4] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
diff --git a/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h
index 5efae129bb..3663a394da 100644
--- a/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h
@@ -1275,136 +1275,263 @@ namespace mg5amcCpu
 
 #ifndef MGONGPU_LINKER_HELAMPS
 
-#define helas_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
-#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
-#define helas_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
-#define helas_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CD_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1P0_1 VVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_1 FFV1_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_2 FFV1_2<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1P0_3 FFV1P0_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CI_ACCESS>
 
 #else
 
-#define helas_VVV1_0 linker_VVV1_0
-#define helas_VVV1P0_1 linker_VVV1P0_1
-#define helas_FFV1_0 linker_FFV1_0
-#define helas_FFV1_1 linker_FFV1_1
-#define helas_FFV1_2 linker_FFV1_2
-#define helas_FFV1P0_3 linker_FFV1P0_3
-#define helas_VVVV1P0_1 linker_VVVV1P0_1
-#define helas_VVVV3P0_1 linker_VVVV3P0_1
-#define helas_VVVV4P0_1 linker_VVVV4P0_1
+#define helas_CD_VVV1_0 linker_CD_VVV1_0
+#define helas_CI_VVV1_0 linker_CI_VVV1_0
+#define helas_CD_VVV1P0_1 linker_CD_VVV1P0_1
+#define helas_CI_VVV1P0_1 linker_CI_VVV1P0_1
+#define helas_CD_FFV1_0 linker_CD_FFV1_0
+#define helas_CI_FFV1_0 linker_CI_FFV1_0
+#define helas_CD_FFV1_1 linker_CD_FFV1_1
+#define helas_CI_FFV1_1 linker_CI_FFV1_1
+#define helas_CD_FFV1_2 linker_CD_FFV1_2
+#define helas_CI_FFV1_2 linker_CI_FFV1_2
+#define helas_CD_FFV1P0_3 linker_CD_FFV1P0_3
+#define helas_CI_FFV1P0_3 linker_CI_FFV1P0_3
+#define helas_CD_VVVV1P0_1 linker_CD_VVVV1P0_1
+#define helas_CI_VVVV1P0_1 linker_CI_VVVV1P0_1
+#define helas_CD_VVVV3P0_1 linker_CD_VVVV3P0_1
+#define helas_CI_VVVV3P0_1 linker_CI_VVVV3P0_1
+#define helas_CD_VVVV4P0_1 linker_CD_VVVV4P0_1
+#define helas_CI_VVVV4P0_1 linker_CI_VVVV4P0_1
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] );
+  linker_CI_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] );
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] );
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] );
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV1P0_1( const fptype allV2[],
+  linker_CD_FFV1_1( const fptype allF2[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
                     const fptype M1,
                     const fptype W1,
-                    fptype allV1[] );
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV3P0_1( const fptype allV2[],
+  linker_CI_FFV1_1( const fptype allF2[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
                     const fptype M1,
                     const fptype W1,
-                    fptype allV1[] );
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV4P0_1( const fptype allV2[],
+  linker_CD_FFV1_2( const fptype allF1[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] );
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt
index e3adcfcb3c..040b2ee799 100644
--- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt
+++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~ g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005609273910522461 [0m
+[1;32mDEBUG: model prefixing  takes 0.005586862564086914 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1  
 INFO: Process has 123 diagrams 
-1 processes with 123 diagrams generated in 0.165 s
+1 processes with 123 diagrams generated in 0.162 s
 Total: 1 processes with 123 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -177,8 +177,8 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 
 INFO: Processing color information for process: g g > t t~ g g @1 
 INFO: Creating files in directory P1_gg_ttxgg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f5273b8fe50> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fd3cad1ee50> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -194,25 +194,25 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 105 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 105 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [1;30m[model_handling.py at line 1626][0m [0m
 Generated helas calls for 1 subprocesses (123 diagrams) in 0.441 s
-Wrote files for 222 helas calls in 0.733 s
+Wrote files for 222 helas calls in 0.731 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.352 s
+ALOHA: aloha creates 5 routines in  0.344 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 10 routines in  0.330 s
+ALOHA: aloha creates 10 routines in  0.323 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -258,9 +258,9 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m4.003s
-user	0m3.676s
-sys	0m0.300s
+real	0m3.936s
+user	0m3.657s
+sys	0m0.271s
 Code generation completed in 4 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/HelAmps.cc
index 845cf9fd87..ebe42b3ce3 100644
--- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/HelAmps.cc
@@ -62,185 +62,366 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] )
+  linker_CI_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+    return VVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
-    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] )
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
-    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+    return VVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] )
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] )
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+    return FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV1_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] )
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
   {
-    return VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV1P0_1( const fptype allV2[],
+  linker_CI_FFV1_1( const fptype allF2[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
                     const fptype M1,
                     const fptype W1,
-                    fptype allV1[] )
+                    fptype allF1[] )
   {
-    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+    return FFV1_1<W_ACCESS, CI_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV3_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] )
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
   {
-    return VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV3P0_1( const fptype allV2[],
+  linker_CI_FFV1_2( const fptype allF1[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] )
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CI_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV3_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
   {
     return VVVV3P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV3P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
   __device__ void
-  linker_VVVV4_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] )
+  linker_CD_VVVV4_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
   {
     return VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
   __device__ void
-  linker_VVVV4P0_1( const fptype allV2[],
-                    const fptype allV3[],
-                    const fptype allV4[],
-                    const fptype allCOUP[],
-                    const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] )
+  linker_CI_VVVV4_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV4_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
   {
     return VVVV4P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV4P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
 }
 #endif
diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc
index 2a065101ff..c2c2e28d54 100644
--- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc
+++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc
@@ -336,11 +336,11 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 );
 
-      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
 
       // Amplitude(s) for diagram number 1
-      helas_VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -349,7 +349,7 @@ namespace mg5amcCpu
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -358,7 +358,7 @@ namespace mg5amcCpu
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -371,10 +371,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 123 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[8] );
+      helas_CD_VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      helas_VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -391,10 +391,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 123 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[9] );
+      helas_CD_VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 3
-      helas_VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -411,10 +411,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 123 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 4
-      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -431,11 +431,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 123 ***
 
       // Wavefunction(s) for diagram number 5
-      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
-      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 5
-      helas_FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -449,7 +449,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -462,10 +462,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 123 ***
 
       // Wavefunction(s) for diagram number 7
-      helas_FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 7
-      helas_FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -476,10 +476,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 8 OF 123 ***
 
       // Wavefunction(s) for diagram number 8
-      helas_FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
 
       // Amplitude(s) for diagram number 8
-      helas_FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -493,7 +493,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 9
-      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -506,10 +506,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 123 ***
 
       // Wavefunction(s) for diagram number 10
-      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
 
       // Amplitude(s) for diagram number 10
-      helas_FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -520,10 +520,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 123 ***
 
       // Wavefunction(s) for diagram number 11
-      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
 
       // Amplitude(s) for diagram number 11
-      helas_FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -537,7 +537,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 12
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -553,7 +553,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 13
-      helas_FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -567,7 +567,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -583,7 +583,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 15
-      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -599,7 +599,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 16
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -612,12 +612,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 17 OF 123 ***
 
       // Wavefunction(s) for diagram number 17
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
-      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      helas_FFV1_1( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_CD_FFV1_1( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 17
-      helas_FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -627,10 +627,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 18 OF 123 ***
 
       // Wavefunction(s) for diagram number 18
-      helas_FFV1_1( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_1( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 18
-      helas_FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -643,7 +643,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 19
-      helas_FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -654,11 +654,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 20 OF 123 ***
 
       // Wavefunction(s) for diagram number 20
-      helas_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      helas_FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] );
 
       // Amplitude(s) for diagram number 20
-      helas_VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -674,7 +674,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 21
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -688,7 +688,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 22
-      helas_FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -699,10 +699,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 23 OF 123 ***
 
       // Wavefunction(s) for diagram number 23
-      helas_VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[18] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[18] );
 
       // Amplitude(s) for diagram number 23
-      helas_VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -718,7 +718,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 24
-      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -732,7 +732,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 25
-      helas_FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -743,10 +743,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 26 OF 123 ***
 
       // Wavefunction(s) for diagram number 26
-      helas_FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[19] );
+      helas_CD_FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[19] );
 
       // Amplitude(s) for diagram number 26
-      helas_FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -759,7 +759,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 27
-      helas_FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -772,7 +772,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 28
-      helas_FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -785,7 +785,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 29
-      helas_FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -798,7 +798,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 30
-      helas_FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -812,7 +812,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 31
-      helas_VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -825,22 +825,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 32 OF 123 ***
 
       // Wavefunction(s) for diagram number 32
-      helas_VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] );
-      helas_VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[19] );
-      helas_VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[8] );
+      helas_CD_VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] );
+      helas_CD_VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[19] );
+      helas_CD_VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 32
-      helas_FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[4] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
@@ -849,12 +849,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 33 OF 123 ***
 
       // Wavefunction(s) for diagram number 33
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
-      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
-      helas_FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
 
       // Amplitude(s) for diagram number 33
-      helas_FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -864,10 +864,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 34 OF 123 ***
 
       // Wavefunction(s) for diagram number 34
-      helas_FFV1_2( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_CD_FFV1_2( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 34
-      helas_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 34 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -880,7 +880,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 35
-      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -891,10 +891,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 36 OF 123 ***
 
       // Wavefunction(s) for diagram number 36
-      helas_FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[22] );
+      helas_CD_FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[22] );
 
       // Amplitude(s) for diagram number 36
-      helas_VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -910,7 +910,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 37
-      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 37 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -924,7 +924,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 38
-      helas_FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 38 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -938,7 +938,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 39
-      helas_VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 39 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -954,7 +954,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 40
-      helas_FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 40 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -968,7 +968,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 41
-      helas_FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 41 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -979,10 +979,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 42 OF 123 ***
 
       // Wavefunction(s) for diagram number 42
-      helas_FFV1_2( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_CD_FFV1_2( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
 
       // Amplitude(s) for diagram number 42
-      helas_FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 42 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -995,7 +995,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 43
-      helas_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 43 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1008,7 +1008,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 44
-      helas_FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 44 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1021,7 +1021,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 45
-      helas_FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 45 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1034,7 +1034,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 46
-      helas_FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 46 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1048,7 +1048,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 47
-      helas_VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 47 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1064,17 +1064,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 48
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[21] += amp_sv[0];
@@ -1083,11 +1083,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 49 OF 123 ***
 
       // Wavefunction(s) for diagram number 49
-      helas_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[12] );
-      helas_FFV1_2( w_fp[3], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[12] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
 
       // Amplitude(s) for diagram number 49
-      helas_FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 49 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1098,10 +1098,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 50 OF 123 ***
 
       // Wavefunction(s) for diagram number 50
-      helas_VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_CD_VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[23] );
 
       // Amplitude(s) for diagram number 50
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 50 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1117,7 +1117,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 51
-      helas_FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 51 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1128,10 +1128,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 52 OF 123 ***
 
       // Wavefunction(s) for diagram number 52
-      helas_FFV1_1( w_fp[2], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
 
       // Amplitude(s) for diagram number 52
-      helas_FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 52 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1145,7 +1145,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 53
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 53 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1161,7 +1161,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 54
-      helas_FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 54 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1175,7 +1175,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 55
-      helas_FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 55 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1191,7 +1191,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 56
-      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 56 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1207,7 +1207,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 57
-      helas_VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 57 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1227,7 +1227,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 58
-      helas_VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1236,7 +1236,7 @@ namespace mg5amcCpu
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1245,7 +1245,7 @@ namespace mg5amcCpu
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1258,10 +1258,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 59 OF 123 ***
 
       // Wavefunction(s) for diagram number 59
-      helas_VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 59
-      helas_VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 59 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1281,7 +1281,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 60
-      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 60 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1301,7 +1301,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 61
-      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 61 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1317,7 +1317,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 62
-      helas_FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 62 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1331,7 +1331,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 63
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 63 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1347,7 +1347,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 64
-      helas_FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 64 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1358,11 +1358,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 65 OF 123 ***
 
       // Wavefunction(s) for diagram number 65
-      helas_VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] );
-      helas_FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 65
-      helas_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 65 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1373,10 +1373,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 66 OF 123 ***
 
       // Wavefunction(s) for diagram number 66
-      helas_VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[22] );
+      helas_CD_VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[22] );
 
       // Amplitude(s) for diagram number 66
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 66 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1392,7 +1392,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 67
-      helas_FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 67 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1403,10 +1403,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 68 OF 123 ***
 
       // Wavefunction(s) for diagram number 68
-      helas_FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
 
       // Amplitude(s) for diagram number 68
-      helas_FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 68 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1420,7 +1420,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 69
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 69 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1436,7 +1436,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 70
-      helas_FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 70 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1450,7 +1450,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 71
-      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 71 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1466,7 +1466,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 72
-      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 72 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1482,7 +1482,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 73
-      helas_VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 73 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1502,7 +1502,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 74
-      helas_VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1511,7 +1511,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1520,7 +1520,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1533,10 +1533,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 75 OF 123 ***
 
       // Wavefunction(s) for diagram number 75
-      helas_VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[12] );
+      helas_CD_VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[12] );
 
       // Amplitude(s) for diagram number 75
-      helas_VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 75 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1556,7 +1556,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 76
-      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 76 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1576,7 +1576,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 77
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 77 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1592,7 +1592,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 78
-      helas_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 78 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1606,7 +1606,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 79
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 79 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1622,7 +1622,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 80
-      helas_FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 80 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1633,10 +1633,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 81 OF 123 ***
 
       // Wavefunction(s) for diagram number 81
-      helas_FFV1_1( w_fp[9], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_CD_FFV1_1( w_fp[9], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
 
       // Amplitude(s) for diagram number 81
-      helas_FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 81 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1646,10 +1646,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 82 OF 123 ***
 
       // Wavefunction(s) for diagram number 82
-      helas_FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_CD_FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 82
-      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 82 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1662,7 +1662,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 83
-      helas_FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 83 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1672,10 +1672,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 84 OF 123 ***
 
       // Wavefunction(s) for diagram number 84
-      helas_FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_CD_FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 84
-      helas_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 84 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1688,7 +1688,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 85
-      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 85 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1699,10 +1699,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 86 OF 123 ***
 
       // Wavefunction(s) for diagram number 86
-      helas_VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[23] );
 
       // Amplitude(s) for diagram number 86
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 86 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1715,10 +1715,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 87 OF 123 ***
 
       // Wavefunction(s) for diagram number 87
-      helas_FFV1_2( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
+      helas_CD_FFV1_2( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
 
       // Amplitude(s) for diagram number 87
-      helas_FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 87 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1728,10 +1728,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 88 OF 123 ***
 
       // Wavefunction(s) for diagram number 88
-      helas_FFV1_1( w_fp[11], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
+      helas_CD_FFV1_1( w_fp[11], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
 
       // Amplitude(s) for diagram number 88
-      helas_FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 88 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1744,7 +1744,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 89
-      helas_FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 89 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1754,10 +1754,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 90 OF 123 ***
 
       // Wavefunction(s) for diagram number 90
-      helas_FFV1_1( w_fp[14], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] );
+      helas_CD_FFV1_1( w_fp[14], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] );
 
       // Amplitude(s) for diagram number 90
-      helas_FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 90 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1770,7 +1770,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 91
-      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 91 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1784,7 +1784,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 92
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 92 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1800,7 +1800,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 93
-      helas_VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1809,7 +1809,7 @@ namespace mg5amcCpu
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1818,7 +1818,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1831,10 +1831,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 94 OF 123 ***
 
       // Wavefunction(s) for diagram number 94
-      helas_VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[22] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[22] );
 
       // Amplitude(s) for diagram number 94
-      helas_VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 94 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1851,10 +1851,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 95 OF 123 ***
 
       // Wavefunction(s) for diagram number 95
-      helas_VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[25] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[25] );
 
       // Amplitude(s) for diagram number 95
-      helas_VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 95 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1874,7 +1874,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 96
-      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 96 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1890,7 +1890,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 97
-      helas_FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 97 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1904,7 +1904,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 98
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 98 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1920,7 +1920,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 99
-      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 99 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1934,7 +1934,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 100
-      helas_VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1943,7 +1943,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1952,7 +1952,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1965,10 +1965,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 101 OF 123 ***
 
       // Wavefunction(s) for diagram number 101
-      helas_VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 101
-      helas_VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 101 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1988,7 +1988,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 102
-      helas_VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 102 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2008,7 +2008,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 103
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 103 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2024,7 +2024,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 104
-      helas_FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 104 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2038,7 +2038,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 105
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 105 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2054,7 +2054,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 106
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 106 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2068,7 +2068,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 107
-      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2077,7 +2077,7 @@ namespace mg5amcCpu
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2086,7 +2086,7 @@ namespace mg5amcCpu
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2102,7 +2102,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 108
-      helas_VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 108 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2122,7 +2122,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 109
-      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 109 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2142,7 +2142,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 110
-      helas_FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 110 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2155,7 +2155,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 111
-      helas_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 111 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2168,7 +2168,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 112
-      helas_FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 112 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2181,7 +2181,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 113
-      helas_FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 113 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2191,12 +2191,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 114 OF 123 ***
 
       // Wavefunction(s) for diagram number 114
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[12] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[24] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[12] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[24] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 114
-      helas_VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2205,7 +2205,7 @@ namespace mg5amcCpu
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2214,7 +2214,7 @@ namespace mg5amcCpu
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2230,17 +2230,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 115
-      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] += amp_sv[0];
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
       jamp_sv[22] += amp_sv[0];
@@ -2252,17 +2252,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 116
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[12] += amp_sv[0];
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[12] += amp_sv[0];
@@ -2271,12 +2271,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 117 OF 123 ***
 
       // Wavefunction(s) for diagram number 117
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
 
       // Amplitude(s) for diagram number 117
-      helas_VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2285,7 +2285,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2294,7 +2294,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2310,17 +2310,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 118
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] += amp_sv[0];
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[16] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
       jamp_sv[16] += amp_sv[0];
@@ -2332,17 +2332,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 119
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[18] += amp_sv[0];
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[18] += amp_sv[0];
@@ -2351,22 +2351,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 120 OF 123 ***
 
       // Wavefunction(s) for diagram number 120
-      helas_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
 
       // Amplitude(s) for diagram number 120
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[10] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
       jamp_sv[10] += amp_sv[0];
@@ -2378,17 +2378,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 121
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[19] += amp_sv[0];
@@ -2400,7 +2400,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 122
-      helas_VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2409,7 +2409,7 @@ namespace mg5amcCpu
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2418,7 +2418,7 @@ namespace mg5amcCpu
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2434,7 +2434,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 123
-      helas_VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2443,7 +2443,7 @@ namespace mg5amcCpu
       jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2452,7 +2452,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
diff --git a/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h
index 24e8114e3a..624de4a7b3 100644
--- a/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h
@@ -1404,178 +1404,347 @@ namespace mg5amcCpu
 
 #ifndef MGONGPU_LINKER_HELAMPS
 
-#define helas_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
-#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
-#define helas_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_VVVV3_0 VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
-#define helas_VVVV4_0 VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CD_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1P0_1 VVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_1 FFV1_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_2 FFV1_2<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1P0_3 FFV1P0_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV3_0 VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV3_0 VVVV3_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV4_0 VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV4_0 VVVV4_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CI_ACCESS>
 
 #else
 
-#define helas_VVV1_0 linker_VVV1_0
-#define helas_VVV1P0_1 linker_VVV1P0_1
-#define helas_FFV1_0 linker_FFV1_0
-#define helas_FFV1_1 linker_FFV1_1
-#define helas_FFV1_2 linker_FFV1_2
-#define helas_FFV1P0_3 linker_FFV1P0_3
-#define helas_VVVV1_0 linker_VVVV1_0
-#define helas_VVVV1P0_1 linker_VVVV1P0_1
-#define helas_VVVV3_0 linker_VVVV3_0
-#define helas_VVVV3P0_1 linker_VVVV3P0_1
-#define helas_VVVV4_0 linker_VVVV4_0
-#define helas_VVVV4P0_1 linker_VVVV4P0_1
+#define helas_CD_VVV1_0 linker_CD_VVV1_0
+#define helas_CI_VVV1_0 linker_CI_VVV1_0
+#define helas_CD_VVV1P0_1 linker_CD_VVV1P0_1
+#define helas_CI_VVV1P0_1 linker_CI_VVV1P0_1
+#define helas_CD_FFV1_0 linker_CD_FFV1_0
+#define helas_CI_FFV1_0 linker_CI_FFV1_0
+#define helas_CD_FFV1_1 linker_CD_FFV1_1
+#define helas_CI_FFV1_1 linker_CI_FFV1_1
+#define helas_CD_FFV1_2 linker_CD_FFV1_2
+#define helas_CI_FFV1_2 linker_CI_FFV1_2
+#define helas_CD_FFV1P0_3 linker_CD_FFV1P0_3
+#define helas_CI_FFV1P0_3 linker_CI_FFV1P0_3
+#define helas_CD_VVVV1_0 linker_CD_VVVV1_0
+#define helas_CI_VVVV1_0 linker_CI_VVVV1_0
+#define helas_CD_VVVV1P0_1 linker_CD_VVVV1P0_1
+#define helas_CI_VVVV1P0_1 linker_CI_VVVV1P0_1
+#define helas_CD_VVVV3_0 linker_CD_VVVV3_0
+#define helas_CI_VVVV3_0 linker_CI_VVVV3_0
+#define helas_CD_VVVV3P0_1 linker_CD_VVVV3P0_1
+#define helas_CI_VVVV3P0_1 linker_CI_VVVV3P0_1
+#define helas_CD_VVVV4_0 linker_CD_VVVV4_0
+#define helas_CI_VVVV4_0 linker_CI_VVVV4_0
+#define helas_CD_VVVV4P0_1 linker_CD_VVVV4P0_1
+#define helas_CI_VVVV4P0_1 linker_CI_VVVV4P0_1
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] );
+  linker_CI_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] );
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] );
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] );
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV1_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] );
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV1P0_1( const fptype allV2[],
+  linker_CI_FFV1_1( const fptype allF2[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
                     const fptype M1,
                     const fptype W1,
-                    fptype allV1[] );
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV3_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] );
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV3P0_1( const fptype allV2[],
+  linker_CI_FFV1_2( const fptype allF1[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] );
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
   __device__ void
-  linker_VVVV4_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] );
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
   __device__ void
-  linker_VVVV4P0_1( const fptype allV2[],
-                    const fptype allV3[],
-                    const fptype allV4[],
-                    const fptype allCOUP[],
-                    const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] );
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV4_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV4_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt
index 1b8b96727c..5a80864c2c 100644
--- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt
+++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~ g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005701541900634766 [0m
+[1;32mDEBUG: model prefixing  takes 0.0056858062744140625 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1  
 INFO: Process has 123 diagrams 
-1 processes with 123 diagrams generated in 0.164 s
+1 processes with 123 diagrams generated in 0.163 s
 Total: 1 processes with 123 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg
 Load PLUGIN.CUDACPP_OUTPUT
@@ -185,7 +185,7 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.329 s
+ALOHA: aloha creates 5 routines in  0.333 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -210,7 +210,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. 
 quit
 
-real	0m1.496s
-user	0m1.443s
-sys	0m0.044s
-Code generation completed in 2 seconds
+real	0m1.499s
+user	0m1.430s
+sys	0m0.060s
+Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/HelAmps.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/HelAmps.cc
index 845cf9fd87..ebe42b3ce3 100644
--- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/HelAmps.cc
@@ -62,185 +62,366 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] )
+  linker_CI_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+    return VVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
-    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] )
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
-    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+    return VVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] )
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] )
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+    return FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV1_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] )
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
   {
-    return VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV1P0_1( const fptype allV2[],
+  linker_CI_FFV1_1( const fptype allF2[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
                     const fptype M1,
                     const fptype W1,
-                    fptype allV1[] )
+                    fptype allF1[] )
   {
-    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+    return FFV1_1<W_ACCESS, CI_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV3_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] )
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
   {
-    return VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV3P0_1( const fptype allV2[],
+  linker_CI_FFV1_2( const fptype allF1[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] )
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CI_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV3_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
   {
     return VVVV3P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV3P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
   __device__ void
-  linker_VVVV4_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] )
+  linker_CD_VVVV4_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
   {
     return VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
   __device__ void
-  linker_VVVV4P0_1( const fptype allV2[],
-                    const fptype allV3[],
-                    const fptype allV4[],
-                    const fptype allCOUP[],
-                    const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] )
+  linker_CI_VVVV4_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV4_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
   {
     return VVVV4P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV4P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
 }
 #endif
diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc
index a7cc5471b8..36d67dc897 100644
--- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc
+++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc
@@ -336,11 +336,11 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 );
 
-      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
 
       // Amplitude(s) for diagram number 1
-      helas_VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -352,7 +352,7 @@ namespace mg5amcCpu
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -364,7 +364,7 @@ namespace mg5amcCpu
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -380,10 +380,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 123 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[8] );
+      helas_CD_VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      helas_VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -399,10 +399,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 123 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[9] );
+      helas_CD_VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 3
-      helas_VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -418,10 +418,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 123 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 4
-      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -437,11 +437,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 123 ***
 
       // Wavefunction(s) for diagram number 5
-      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
-      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 5
-      helas_FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -454,7 +454,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -466,10 +466,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 123 ***
 
       // Wavefunction(s) for diagram number 7
-      helas_FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 7
-      helas_FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -479,10 +479,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 8 OF 123 ***
 
       // Wavefunction(s) for diagram number 8
-      helas_FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
 
       // Amplitude(s) for diagram number 8
-      helas_FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -495,7 +495,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 9
-      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -507,10 +507,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 123 ***
 
       // Wavefunction(s) for diagram number 10
-      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
 
       // Amplitude(s) for diagram number 10
-      helas_FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -520,10 +520,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 123 ***
 
       // Wavefunction(s) for diagram number 11
-      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
 
       // Amplitude(s) for diagram number 11
-      helas_FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -536,7 +536,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 12
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -551,7 +551,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 13
-      helas_FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -564,7 +564,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -579,7 +579,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 15
-      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -594,7 +594,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 16
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -606,12 +606,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 17 OF 123 ***
 
       // Wavefunction(s) for diagram number 17
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
-      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      helas_FFV1_1( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_CD_FFV1_1( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 17
-      helas_FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -620,10 +620,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 18 OF 123 ***
 
       // Wavefunction(s) for diagram number 18
-      helas_FFV1_1( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_1( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 18
-      helas_FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -635,7 +635,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 19
-      helas_FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -645,11 +645,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 20 OF 123 ***
 
       // Wavefunction(s) for diagram number 20
-      helas_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      helas_FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] );
 
       // Amplitude(s) for diagram number 20
-      helas_VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -664,7 +664,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 21
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -677,7 +677,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 22
-      helas_FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -687,10 +687,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 23 OF 123 ***
 
       // Wavefunction(s) for diagram number 23
-      helas_VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[18] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[18] );
 
       // Amplitude(s) for diagram number 23
-      helas_VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -705,7 +705,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 24
-      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -718,7 +718,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 25
-      helas_FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -728,10 +728,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 26 OF 123 ***
 
       // Wavefunction(s) for diagram number 26
-      helas_FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[19] );
+      helas_CD_FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[19] );
 
       // Amplitude(s) for diagram number 26
-      helas_FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -743,7 +743,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 27
-      helas_FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -755,7 +755,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 28
-      helas_FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -767,7 +767,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 29
-      helas_FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -779,7 +779,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 30
-      helas_FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -792,7 +792,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 31
-      helas_VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -804,12 +804,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 32 OF 123 ***
 
       // Wavefunction(s) for diagram number 32
-      helas_VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] );
-      helas_VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[19] );
-      helas_VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[8] );
+      helas_CD_VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] );
+      helas_CD_VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[19] );
+      helas_CD_VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 32
-      helas_FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -817,7 +817,7 @@ namespace mg5amcCpu
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -825,7 +825,7 @@ namespace mg5amcCpu
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[4] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -837,12 +837,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 33 OF 123 ***
 
       // Wavefunction(s) for diagram number 33
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
-      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
-      helas_FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
 
       // Amplitude(s) for diagram number 33
-      helas_FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -851,10 +851,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 34 OF 123 ***
 
       // Wavefunction(s) for diagram number 34
-      helas_FFV1_2( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_CD_FFV1_2( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 34
-      helas_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -866,7 +866,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 35
-      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -876,10 +876,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 36 OF 123 ***
 
       // Wavefunction(s) for diagram number 36
-      helas_FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[22] );
+      helas_CD_FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[22] );
 
       // Amplitude(s) for diagram number 36
-      helas_VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -894,7 +894,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 37
-      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -907,7 +907,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 38
-      helas_FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -920,7 +920,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 39
-      helas_VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -935,7 +935,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 40
-      helas_FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -948,7 +948,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 41
-      helas_FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -958,10 +958,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 42 OF 123 ***
 
       // Wavefunction(s) for diagram number 42
-      helas_FFV1_2( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_CD_FFV1_2( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
 
       // Amplitude(s) for diagram number 42
-      helas_FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -973,7 +973,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 43
-      helas_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -985,7 +985,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 44
-      helas_FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -997,7 +997,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 45
-      helas_FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1009,7 +1009,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 46
-      helas_FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1022,7 +1022,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 47
-      helas_VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1037,7 +1037,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 48
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1045,7 +1045,7 @@ namespace mg5amcCpu
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1053,7 +1053,7 @@ namespace mg5amcCpu
       jamp_sv[15] += amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1065,11 +1065,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 49 OF 123 ***
 
       // Wavefunction(s) for diagram number 49
-      helas_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[12] );
-      helas_FFV1_2( w_fp[3], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[12] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
 
       // Amplitude(s) for diagram number 49
-      helas_FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1079,10 +1079,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 50 OF 123 ***
 
       // Wavefunction(s) for diagram number 50
-      helas_VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_CD_VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[23] );
 
       // Amplitude(s) for diagram number 50
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1097,7 +1097,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 51
-      helas_FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1107,10 +1107,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 52 OF 123 ***
 
       // Wavefunction(s) for diagram number 52
-      helas_FFV1_1( w_fp[2], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
 
       // Amplitude(s) for diagram number 52
-      helas_FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1123,7 +1123,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 53
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1138,7 +1138,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 54
-      helas_FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1151,7 +1151,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 55
-      helas_FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1166,7 +1166,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 56
-      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1181,7 +1181,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 57
-      helas_VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1200,7 +1200,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 58
-      helas_VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1212,7 +1212,7 @@ namespace mg5amcCpu
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1224,7 +1224,7 @@ namespace mg5amcCpu
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1240,10 +1240,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 59 OF 123 ***
 
       // Wavefunction(s) for diagram number 59
-      helas_VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 59
-      helas_VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1262,7 +1262,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 60
-      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1281,7 +1281,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 61
-      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1296,7 +1296,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 62
-      helas_FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1309,7 +1309,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 63
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1324,7 +1324,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 64
-      helas_FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1334,11 +1334,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 65 OF 123 ***
 
       // Wavefunction(s) for diagram number 65
-      helas_VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] );
-      helas_FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 65
-      helas_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1348,10 +1348,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 66 OF 123 ***
 
       // Wavefunction(s) for diagram number 66
-      helas_VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[22] );
+      helas_CD_VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[22] );
 
       // Amplitude(s) for diagram number 66
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1366,7 +1366,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 67
-      helas_FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1376,10 +1376,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 68 OF 123 ***
 
       // Wavefunction(s) for diagram number 68
-      helas_FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
 
       // Amplitude(s) for diagram number 68
-      helas_FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1392,7 +1392,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 69
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1407,7 +1407,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 70
-      helas_FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1420,7 +1420,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 71
-      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1435,7 +1435,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 72
-      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1450,7 +1450,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 73
-      helas_VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1469,7 +1469,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 74
-      helas_VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1481,7 +1481,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1493,7 +1493,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1509,10 +1509,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 75 OF 123 ***
 
       // Wavefunction(s) for diagram number 75
-      helas_VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[12] );
+      helas_CD_VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[12] );
 
       // Amplitude(s) for diagram number 75
-      helas_VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1531,7 +1531,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 76
-      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1550,7 +1550,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 77
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1565,7 +1565,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 78
-      helas_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1578,7 +1578,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 79
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1593,7 +1593,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 80
-      helas_FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1603,10 +1603,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 81 OF 123 ***
 
       // Wavefunction(s) for diagram number 81
-      helas_FFV1_1( w_fp[9], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_CD_FFV1_1( w_fp[9], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
 
       // Amplitude(s) for diagram number 81
-      helas_FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1615,10 +1615,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 82 OF 123 ***
 
       // Wavefunction(s) for diagram number 82
-      helas_FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_CD_FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 82
-      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1630,7 +1630,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 83
-      helas_FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1639,10 +1639,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 84 OF 123 ***
 
       // Wavefunction(s) for diagram number 84
-      helas_FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_CD_FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 84
-      helas_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1654,7 +1654,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 85
-      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1664,10 +1664,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 86 OF 123 ***
 
       // Wavefunction(s) for diagram number 86
-      helas_VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[23] );
 
       // Amplitude(s) for diagram number 86
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1679,10 +1679,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 87 OF 123 ***
 
       // Wavefunction(s) for diagram number 87
-      helas_FFV1_2( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
+      helas_CD_FFV1_2( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
 
       // Amplitude(s) for diagram number 87
-      helas_FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1691,10 +1691,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 88 OF 123 ***
 
       // Wavefunction(s) for diagram number 88
-      helas_FFV1_1( w_fp[11], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
+      helas_CD_FFV1_1( w_fp[11], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
 
       // Amplitude(s) for diagram number 88
-      helas_FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1706,7 +1706,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 89
-      helas_FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1715,10 +1715,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 90 OF 123 ***
 
       // Wavefunction(s) for diagram number 90
-      helas_FFV1_1( w_fp[14], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] );
+      helas_CD_FFV1_1( w_fp[14], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] );
 
       // Amplitude(s) for diagram number 90
-      helas_FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1730,7 +1730,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 91
-      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1743,7 +1743,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 92
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1758,7 +1758,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 93
-      helas_VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1770,7 +1770,7 @@ namespace mg5amcCpu
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1782,7 +1782,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1798,10 +1798,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 94 OF 123 ***
 
       // Wavefunction(s) for diagram number 94
-      helas_VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[22] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[22] );
 
       // Amplitude(s) for diagram number 94
-      helas_VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1817,10 +1817,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 95 OF 123 ***
 
       // Wavefunction(s) for diagram number 95
-      helas_VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[25] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[25] );
 
       // Amplitude(s) for diagram number 95
-      helas_VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1839,7 +1839,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 96
-      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1854,7 +1854,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 97
-      helas_FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1867,7 +1867,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 98
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1882,7 +1882,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 99
-      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1895,7 +1895,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 100
-      helas_VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1907,7 +1907,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1919,7 +1919,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1935,10 +1935,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 101 OF 123 ***
 
       // Wavefunction(s) for diagram number 101
-      helas_VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 101
-      helas_VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1957,7 +1957,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 102
-      helas_VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1976,7 +1976,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 103
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1991,7 +1991,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 104
-      helas_FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2004,7 +2004,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 105
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2019,7 +2019,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 106
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2032,7 +2032,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 107
-      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2044,7 +2044,7 @@ namespace mg5amcCpu
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2056,7 +2056,7 @@ namespace mg5amcCpu
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2075,7 +2075,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 108
-      helas_VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2094,7 +2094,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 109
-      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2113,7 +2113,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 110
-      helas_FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2125,7 +2125,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 111
-      helas_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2137,7 +2137,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 112
-      helas_FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2149,7 +2149,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 113
-      helas_FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2158,12 +2158,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 114 OF 123 ***
 
       // Wavefunction(s) for diagram number 114
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[12] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[24] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[12] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[24] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 114
-      helas_VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2175,7 +2175,7 @@ namespace mg5amcCpu
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2187,7 +2187,7 @@ namespace mg5amcCpu
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2206,7 +2206,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 115
-      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2214,7 +2214,7 @@ namespace mg5amcCpu
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2222,7 +2222,7 @@ namespace mg5amcCpu
       jamp_sv[20] += amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2237,7 +2237,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 116
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2245,7 +2245,7 @@ namespace mg5amcCpu
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2253,7 +2253,7 @@ namespace mg5amcCpu
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[12] += amp_sv[0];
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2265,12 +2265,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 117 OF 123 ***
 
       // Wavefunction(s) for diagram number 117
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
 
       // Amplitude(s) for diagram number 117
-      helas_VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2282,7 +2282,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2294,7 +2294,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2313,7 +2313,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 118
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2321,7 +2321,7 @@ namespace mg5amcCpu
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2329,7 +2329,7 @@ namespace mg5amcCpu
       jamp_sv[14] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[16] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2344,7 +2344,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 119
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2352,7 +2352,7 @@ namespace mg5amcCpu
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2360,7 +2360,7 @@ namespace mg5amcCpu
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[18] += amp_sv[0];
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2372,12 +2372,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 120 OF 123 ***
 
       // Wavefunction(s) for diagram number 120
-      helas_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
 
       // Amplitude(s) for diagram number 120
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2385,7 +2385,7 @@ namespace mg5amcCpu
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2393,7 +2393,7 @@ namespace mg5amcCpu
       jamp_sv[8] += amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[10] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2408,7 +2408,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 121
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2416,7 +2416,7 @@ namespace mg5amcCpu
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2424,7 +2424,7 @@ namespace mg5amcCpu
       jamp_sv[13] += amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2439,7 +2439,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 122
-      helas_VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2451,7 +2451,7 @@ namespace mg5amcCpu
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2463,7 +2463,7 @@ namespace mg5amcCpu
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2482,7 +2482,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 123
-      helas_VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2494,7 +2494,7 @@ namespace mg5amcCpu
       jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2506,7 +2506,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
diff --git a/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h
index 24e8114e3a..624de4a7b3 100644
--- a/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h
@@ -1404,178 +1404,347 @@ namespace mg5amcCpu
 
 #ifndef MGONGPU_LINKER_HELAMPS
 
-#define helas_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
-#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
-#define helas_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_VVVV3_0 VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
-#define helas_VVVV4_0 VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CD_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1P0_1 VVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_1 FFV1_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_2 FFV1_2<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1P0_3 FFV1P0_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV3_0 VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV3_0 VVVV3_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV4_0 VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV4_0 VVVV4_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CI_ACCESS>
 
 #else
 
-#define helas_VVV1_0 linker_VVV1_0
-#define helas_VVV1P0_1 linker_VVV1P0_1
-#define helas_FFV1_0 linker_FFV1_0
-#define helas_FFV1_1 linker_FFV1_1
-#define helas_FFV1_2 linker_FFV1_2
-#define helas_FFV1P0_3 linker_FFV1P0_3
-#define helas_VVVV1_0 linker_VVVV1_0
-#define helas_VVVV1P0_1 linker_VVVV1P0_1
-#define helas_VVVV3_0 linker_VVVV3_0
-#define helas_VVVV3P0_1 linker_VVVV3P0_1
-#define helas_VVVV4_0 linker_VVVV4_0
-#define helas_VVVV4P0_1 linker_VVVV4P0_1
+#define helas_CD_VVV1_0 linker_CD_VVV1_0
+#define helas_CI_VVV1_0 linker_CI_VVV1_0
+#define helas_CD_VVV1P0_1 linker_CD_VVV1P0_1
+#define helas_CI_VVV1P0_1 linker_CI_VVV1P0_1
+#define helas_CD_FFV1_0 linker_CD_FFV1_0
+#define helas_CI_FFV1_0 linker_CI_FFV1_0
+#define helas_CD_FFV1_1 linker_CD_FFV1_1
+#define helas_CI_FFV1_1 linker_CI_FFV1_1
+#define helas_CD_FFV1_2 linker_CD_FFV1_2
+#define helas_CI_FFV1_2 linker_CI_FFV1_2
+#define helas_CD_FFV1P0_3 linker_CD_FFV1P0_3
+#define helas_CI_FFV1P0_3 linker_CI_FFV1P0_3
+#define helas_CD_VVVV1_0 linker_CD_VVVV1_0
+#define helas_CI_VVVV1_0 linker_CI_VVVV1_0
+#define helas_CD_VVVV1P0_1 linker_CD_VVVV1P0_1
+#define helas_CI_VVVV1P0_1 linker_CI_VVVV1P0_1
+#define helas_CD_VVVV3_0 linker_CD_VVVV3_0
+#define helas_CI_VVVV3_0 linker_CI_VVVV3_0
+#define helas_CD_VVVV3P0_1 linker_CD_VVVV3P0_1
+#define helas_CI_VVVV3P0_1 linker_CI_VVVV3P0_1
+#define helas_CD_VVVV4_0 linker_CD_VVVV4_0
+#define helas_CI_VVVV4_0 linker_CI_VVVV4_0
+#define helas_CD_VVVV4P0_1 linker_CD_VVVV4P0_1
+#define helas_CI_VVVV4P0_1 linker_CI_VVVV4P0_1
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] );
+  linker_CI_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] );
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] );
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] );
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV1_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] );
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV1P0_1( const fptype allV2[],
+  linker_CI_FFV1_1( const fptype allF2[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
                     const fptype M1,
                     const fptype W1,
-                    fptype allV1[] );
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV3_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] );
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV3P0_1( const fptype allV2[],
+  linker_CI_FFV1_2( const fptype allF1[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] );
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
   __device__ void
-  linker_VVVV4_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] );
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
   __device__ void
-  linker_VVVV4P0_1( const fptype allV2[],
-                    const fptype allV3[],
-                    const fptype allV4[],
-                    const fptype allCOUP[],
-                    const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] );
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV4_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV4_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt
index 9668c060d9..71e8a6eff9 100644
--- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt
+++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~ g g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005650043487548828 [0m
+[1;32mDEBUG: model prefixing  takes 0.005856752395629883 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1  
 INFO: Process has 1240 diagrams 
-1 processes with 1240 diagrams generated in 1.961 s
+1 processes with 1240 diagrams generated in 1.946 s
 Total: 1 processes with 1240 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -179,8 +179,8 @@ INFO: Processing color information for process: g g > t t~ g g g @1
 INFO: Creating files in directory P1_gg_ttxggg 
 INFO: Computing Color-Flow optimization [15120 term] 
 INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f628bf6bc10> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fb37f02bc10> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -196,25 +196,25 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 945 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [1;30m[model_handling.py at line 1614][0m [0m
-Generated helas calls for 1 subprocesses (1240 diagrams) in 6.828 s
-Wrote files for 2281 helas calls in 19.178 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 945 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [1;30m[model_handling.py at line 1626][0m [0m
+Generated helas calls for 1 subprocesses (1240 diagrams) in 6.788 s
+Wrote files for 2281 helas calls in 19.161 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.334 s
+ALOHA: aloha creates 5 routines in  0.333 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 10 routines in  0.373 s
+ALOHA: aloha creates 10 routines in  0.379 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -260,9 +260,9 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m33.910s
-user	0m33.348s
-sys	0m0.451s
+real	0m33.818s
+user	0m33.213s
+sys	0m0.496s
 Code generation completed in 34 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/HelAmps.cc
index 845cf9fd87..ebe42b3ce3 100644
--- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/HelAmps.cc
@@ -62,185 +62,366 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] )
+  linker_CI_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+    return VVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
-    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] )
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
-    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+    return VVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] )
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] )
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+    return FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV1_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] )
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
   {
-    return VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV1P0_1( const fptype allV2[],
+  linker_CI_FFV1_1( const fptype allF2[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
                     const fptype M1,
                     const fptype W1,
-                    fptype allV1[] )
+                    fptype allF1[] )
   {
-    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+    return FFV1_1<W_ACCESS, CI_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV3_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] )
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
   {
-    return VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV3P0_1( const fptype allV2[],
+  linker_CI_FFV1_2( const fptype allF1[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] )
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CI_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV3_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
   {
     return VVVV3P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV3P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
   __device__ void
-  linker_VVVV4_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] )
+  linker_CD_VVVV4_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
   {
     return VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
   __device__ void
-  linker_VVVV4P0_1( const fptype allV2[],
-                    const fptype allV3[],
-                    const fptype allV4[],
-                    const fptype allCOUP[],
-                    const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] )
+  linker_CI_VVVV4_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV4_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
   {
     return VVVV4P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV4P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
 }
 #endif
diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc
index 30587548a9..e1180e139d 100644
--- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc
+++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc
@@ -338,13 +338,13 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][6], +1, w_fp[6], 6 );
 
-      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] );
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
-      helas_VVV1P0_1( w_fp[7], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[9] );
-      helas_VVV1P0_1( w_fp[8], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_CD_VVV1P0_1( w_fp[7], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[9] );
+      helas_CD_VVV1P0_1( w_fp[8], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 1
-      helas_VVV1_0( w_fp[9], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[9], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -369,10 +369,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 1240 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_VVV1P0_1( w_fp[8], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[11] );
+      helas_CD_VVV1P0_1( w_fp[8], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 2
-      helas_VVV1_0( w_fp[9], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[9], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -400,7 +400,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 3
-      helas_VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -417,7 +417,7 @@ namespace mg5amcCpu
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      helas_VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -434,7 +434,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[118] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[1] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -455,11 +455,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 1240 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_VVV1P0_1( w_fp[7], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[12] );
-      helas_VVV1P0_1( w_fp[8], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[13] );
+      helas_CD_VVV1P0_1( w_fp[7], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[12] );
+      helas_CD_VVV1P0_1( w_fp[8], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[13] );
 
       // Amplitude(s) for diagram number 4
-      helas_VVV1_0( w_fp[12], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[12], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -487,7 +487,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 5
-      helas_VVV1_0( w_fp[12], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[12], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -515,7 +515,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -532,7 +532,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -549,7 +549,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[3] += amp_sv[0];
       jamp_sv[26] += amp_sv[0];
@@ -570,10 +570,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 1240 ***
 
       // Wavefunction(s) for diagram number 7
-      helas_VVV1P0_1( w_fp[7], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[14] );
+      helas_CD_VVV1P0_1( w_fp[7], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 7
-      helas_VVV1_0( w_fp[14], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[14], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -601,7 +601,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 8
-      helas_VVV1_0( w_fp[14], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[14], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -629,7 +629,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 9
-      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -646,7 +646,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[97] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -663,7 +663,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
       jamp_sv[28] += amp_sv[0];
@@ -684,12 +684,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 1240 ***
 
       // Wavefunction(s) for diagram number 10
-      helas_VVVV1P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] );
-      helas_VVVV3P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[16] );
-      helas_VVVV4P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] );
+      helas_CD_VVVV1P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] );
+      helas_CD_VVVV3P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[16] );
+      helas_CD_VVVV4P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] );
 
       // Amplitude(s) for diagram number 10
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[15], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[15], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
@@ -706,7 +706,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -723,7 +723,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -744,12 +744,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 1240 ***
 
       // Wavefunction(s) for diagram number 11
-      helas_VVVV1P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[18] );
-      helas_VVVV3P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[19] );
-      helas_VVVV4P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[20] );
+      helas_CD_VVVV1P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[18] );
+      helas_CD_VVVV3P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[19] );
+      helas_CD_VVVV4P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[20] );
 
       // Amplitude(s) for diagram number 11
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[18], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[18], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[25] -= amp_sv[0];
@@ -766,7 +766,7 @@ namespace mg5amcCpu
       jamp_sv[95] += amp_sv[0];
       jamp_sv[108] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -783,7 +783,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -804,12 +804,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 1240 ***
 
       // Wavefunction(s) for diagram number 12
-      helas_VVVV1P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] );
-      helas_VVVV3P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] );
-      helas_VVVV4P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] );
+      helas_CD_VVVV1P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVVV3P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] );
+      helas_CD_VVVV4P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] );
 
       // Amplitude(s) for diagram number 12
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
@@ -826,7 +826,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[114] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -843,7 +843,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[97] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -864,10 +864,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 13 OF 1240 ***
 
       // Wavefunction(s) for diagram number 13
-      helas_VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[24] );
+      helas_CD_VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[24] );
 
       // Amplitude(s) for diagram number 13
-      helas_VVVV1_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
@@ -884,7 +884,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV3_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
@@ -901,7 +901,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV4_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[28] -= amp_sv[0];
@@ -922,10 +922,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 14 OF 1240 ***
 
       // Wavefunction(s) for diagram number 14
-      helas_VVV1P0_1( w_fp[7], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[25] );
+      helas_CD_VVV1P0_1( w_fp[7], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[25] );
 
       // Amplitude(s) for diagram number 14
-      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[24], w_fp[6], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -950,10 +950,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 15 OF 1240 ***
 
       // Wavefunction(s) for diagram number 15
-      helas_VVV1P0_1( w_fp[7], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[26] );
+      helas_CD_VVV1P0_1( w_fp[7], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[26] );
 
       // Amplitude(s) for diagram number 15
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[26], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[26], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -981,7 +981,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 16
-      helas_VVV1_0( w_fp[8], w_fp[24], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[24], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1006,10 +1006,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 17 OF 1240 ***
 
       // Wavefunction(s) for diagram number 17
-      helas_VVV1P0_1( w_fp[4], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[27] );
+      helas_CD_VVV1P0_1( w_fp[4], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[27] );
 
       // Amplitude(s) for diagram number 17
-      helas_VVVV1_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[3] += amp_sv[0];
@@ -1026,7 +1026,7 @@ namespace mg5amcCpu
       jamp_sv[95] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[25] -= amp_sv[0];
@@ -1043,7 +1043,7 @@ namespace mg5amcCpu
       jamp_sv[95] += amp_sv[0];
       jamp_sv[108] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[26] -= amp_sv[0];
@@ -1067,7 +1067,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 18
-      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[27], w_fp[5], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1092,10 +1092,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 19 OF 1240 ***
 
       // Wavefunction(s) for diagram number 19
-      helas_VVV1P0_1( w_fp[7], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[28] );
+      helas_CD_VVV1P0_1( w_fp[7], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[28] );
 
       // Amplitude(s) for diagram number 19
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[28], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[28], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1123,7 +1123,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 20
-      helas_VVV1_0( w_fp[8], w_fp[27], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[27], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1148,10 +1148,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 21 OF 1240 ***
 
       // Wavefunction(s) for diagram number 21
-      helas_VVV1P0_1( w_fp[5], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] );
+      helas_CD_VVV1P0_1( w_fp[5], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] );
 
       // Amplitude(s) for diagram number 21
-      helas_VVVV1_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -1168,7 +1168,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV3_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
@@ -1185,7 +1185,7 @@ namespace mg5amcCpu
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV4_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
@@ -1209,7 +1209,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 22
-      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[29], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1237,7 +1237,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 23
-      helas_VVV1_0( w_fp[8], w_fp[29], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[29], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1262,10 +1262,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 24 OF 1240 ***
 
       // Wavefunction(s) for diagram number 24
-      helas_VVV1P0_1( w_fp[7], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[25] );
+      helas_CD_VVV1P0_1( w_fp[7], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[25] );
 
       // Amplitude(s) for diagram number 24
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1290,12 +1290,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 25 OF 1240 ***
 
       // Wavefunction(s) for diagram number 25
-      helas_VVVV1P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[30] );
-      helas_VVVV3P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[31] );
-      helas_VVVV4P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[32] );
+      helas_CD_VVVV1P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[30] );
+      helas_CD_VVVV3P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[31] );
+      helas_CD_VVVV4P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[32] );
 
       // Amplitude(s) for diagram number 25
-      helas_VVV1_0( w_fp[7], w_fp[8], w_fp[30], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[8], w_fp[30], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -1312,7 +1312,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVV1_0( w_fp[7], w_fp[8], w_fp[31], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[8], w_fp[31], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -1329,7 +1329,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_VVV1_0( w_fp[7], w_fp[8], w_fp[32], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[8], w_fp[32], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
@@ -1350,12 +1350,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 26 OF 1240 ***
 
       // Wavefunction(s) for diagram number 26
-      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[33] );
-      helas_FFV1_2( w_fp[3], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
-      helas_FFV1_1( w_fp[33], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[35] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[33] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
+      helas_CD_FFV1_1( w_fp[33], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[35] );
 
       // Amplitude(s) for diagram number 26
-      helas_FFV1_0( w_fp[34], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1366,10 +1366,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 27 OF 1240 ***
 
       // Wavefunction(s) for diagram number 27
-      helas_FFV1_1( w_fp[33], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[36] );
+      helas_CD_FFV1_1( w_fp[33], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[36] );
 
       // Amplitude(s) for diagram number 27
-      helas_FFV1_0( w_fp[34], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1380,10 +1380,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 28 OF 1240 ***
 
       // Wavefunction(s) for diagram number 28
-      helas_FFV1P0_3( w_fp[3], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[37] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[37] );
 
       // Amplitude(s) for diagram number 28
-      helas_VVV1_0( w_fp[12], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[12], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1403,7 +1403,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 29
-      helas_FFV1_0( w_fp[3], w_fp[36], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[36], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1419,7 +1419,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 30
-      helas_VVV1_0( w_fp[14], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[14], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1439,7 +1439,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 31
-      helas_FFV1_0( w_fp[3], w_fp[35], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[35], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1455,7 +1455,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 32
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1464,7 +1464,7 @@ namespace mg5amcCpu
       jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1473,7 +1473,7 @@ namespace mg5amcCpu
       jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1486,11 +1486,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 33 OF 1240 ***
 
       // Wavefunction(s) for diagram number 33
-      helas_FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[38] );
-      helas_FFV1_1( w_fp[33], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[38] );
+      helas_CD_FFV1_1( w_fp[33], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] );
 
       // Amplitude(s) for diagram number 33
-      helas_FFV1_0( w_fp[38], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1501,10 +1501,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 34 OF 1240 ***
 
       // Wavefunction(s) for diagram number 34
-      helas_FFV1_2( w_fp[38], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] );
+      helas_CD_FFV1_2( w_fp[38], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] );
 
       // Amplitude(s) for diagram number 34
-      helas_FFV1_0( w_fp[40], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 34 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1518,7 +1518,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 35
-      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[33], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1531,10 +1531,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 36 OF 1240 ***
 
       // Wavefunction(s) for diagram number 36
-      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[41] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[41] );
 
       // Amplitude(s) for diagram number 36
-      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[39], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1545,10 +1545,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 37 OF 1240 ***
 
       // Wavefunction(s) for diagram number 37
-      helas_FFV1_2( w_fp[41], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[42] );
+      helas_CD_FFV1_2( w_fp[41], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[42] );
 
       // Amplitude(s) for diagram number 37
-      helas_FFV1_0( w_fp[42], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[42], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 37 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1562,7 +1562,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 38
-      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[33], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 38 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1578,7 +1578,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 39
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 39 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1594,7 +1594,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 40
-      helas_FFV1_0( w_fp[34], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 40 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1610,7 +1610,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 41
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[25], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[25], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 41 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1627,11 +1627,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 42 OF 1240 ***
 
       // Wavefunction(s) for diagram number 42
-      helas_FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] );
-      helas_FFV1_1( w_fp[39], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[43] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] );
+      helas_CD_FFV1_1( w_fp[39], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[43] );
 
       // Amplitude(s) for diagram number 42
-      helas_FFV1_0( w_fp[34], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 42 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1642,10 +1642,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 43 OF 1240 ***
 
       // Wavefunction(s) for diagram number 43
-      helas_FFV1_1( w_fp[39], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[44] );
+      helas_CD_FFV1_1( w_fp[39], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[44] );
 
       // Amplitude(s) for diagram number 43
-      helas_FFV1_0( w_fp[34], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 43 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1656,10 +1656,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 44 OF 1240 ***
 
       // Wavefunction(s) for diagram number 44
-      helas_FFV1P0_3( w_fp[3], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[45] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[45] );
 
       // Amplitude(s) for diagram number 44
-      helas_VVV1_0( w_fp[9], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[9], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 44 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1679,7 +1679,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 45
-      helas_FFV1_0( w_fp[3], w_fp[44], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[44], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 45 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1695,7 +1695,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 46
-      helas_VVV1_0( w_fp[14], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[14], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 46 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1715,7 +1715,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 47
-      helas_FFV1_0( w_fp[3], w_fp[43], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[43], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 47 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1731,7 +1731,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 48
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1740,7 +1740,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1749,7 +1749,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1762,11 +1762,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 49 OF 1240 ***
 
       // Wavefunction(s) for diagram number 49
-      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[46] );
-      helas_FFV1_1( w_fp[39], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[46] );
+      helas_CD_FFV1_1( w_fp[39], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] );
 
       // Amplitude(s) for diagram number 49
-      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[47], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 49 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1777,10 +1777,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 50 OF 1240 ***
 
       // Wavefunction(s) for diagram number 50
-      helas_FFV1_2( w_fp[46], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] );
+      helas_CD_FFV1_2( w_fp[46], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] );
 
       // Amplitude(s) for diagram number 50
-      helas_FFV1_0( w_fp[48], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 50 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1794,7 +1794,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 51
-      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[39], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 51 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1810,7 +1810,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 52
-      helas_FFV1_0( w_fp[41], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 52 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1824,7 +1824,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 53
-      helas_FFV1_0( w_fp[42], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[42], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 53 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1838,7 +1838,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 54
-      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[39], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 54 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1854,7 +1854,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 55
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 55 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1870,7 +1870,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 56
-      helas_FFV1_0( w_fp[34], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 56 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1886,7 +1886,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 57
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[28], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[28], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 57 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1903,11 +1903,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 58 OF 1240 ***
 
       // Wavefunction(s) for diagram number 58
-      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] );
-      helas_FFV1_1( w_fp[47], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[49] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] );
+      helas_CD_FFV1_1( w_fp[47], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[49] );
 
       // Amplitude(s) for diagram number 58
-      helas_FFV1_0( w_fp[34], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 58 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1918,10 +1918,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 59 OF 1240 ***
 
       // Wavefunction(s) for diagram number 59
-      helas_FFV1_1( w_fp[47], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[50] );
+      helas_CD_FFV1_1( w_fp[47], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[50] );
 
       // Amplitude(s) for diagram number 59
-      helas_FFV1_0( w_fp[34], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 59 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1932,10 +1932,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 60 OF 1240 ***
 
       // Wavefunction(s) for diagram number 60
-      helas_FFV1P0_3( w_fp[3], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[51] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[51] );
 
       // Amplitude(s) for diagram number 60
-      helas_VVV1_0( w_fp[9], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[9], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 60 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1955,7 +1955,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 61
-      helas_FFV1_0( w_fp[3], w_fp[50], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[50], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 61 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1971,7 +1971,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 62
-      helas_VVV1_0( w_fp[12], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[12], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 62 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1991,7 +1991,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 63
-      helas_FFV1_0( w_fp[3], w_fp[49], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[49], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 63 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2007,7 +2007,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 64
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2016,7 +2016,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2025,7 +2025,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2038,10 +2038,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 65 OF 1240 ***
 
       // Wavefunction(s) for diagram number 65
-      helas_FFV1_1( w_fp[47], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
+      helas_CD_FFV1_1( w_fp[47], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
 
       // Amplitude(s) for diagram number 65
-      helas_FFV1_0( w_fp[46], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 65 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2055,7 +2055,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 66
-      helas_FFV1_0( w_fp[48], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 66 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2069,7 +2069,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 67
-      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[47], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 67 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2085,7 +2085,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 68
-      helas_FFV1_0( w_fp[38], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 68 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2099,7 +2099,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 69
-      helas_FFV1_0( w_fp[40], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 69 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2113,7 +2113,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 70
-      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[47], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 70 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2129,7 +2129,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 71
-      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[52], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 71 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2145,7 +2145,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 72
-      helas_FFV1_0( w_fp[34], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 72 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2161,7 +2161,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 73
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[26], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[26], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 73 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2178,11 +2178,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 74 OF 1240 ***
 
       // Wavefunction(s) for diagram number 74
-      helas_FFV1_1( w_fp[2], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
-      helas_FFV1_2( w_fp[46], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
+      helas_CD_FFV1_2( w_fp[46], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
 
       // Amplitude(s) for diagram number 74
-      helas_FFV1_0( w_fp[7], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 74 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2193,10 +2193,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 75 OF 1240 ***
 
       // Wavefunction(s) for diagram number 75
-      helas_FFV1_2( w_fp[46], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[53] );
+      helas_CD_FFV1_2( w_fp[46], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[53] );
 
       // Amplitude(s) for diagram number 75
-      helas_FFV1_0( w_fp[53], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 75 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2207,10 +2207,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 76 OF 1240 ***
 
       // Wavefunction(s) for diagram number 76
-      helas_FFV1P0_3( w_fp[46], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[54] );
+      helas_CD_FFV1P0_3( w_fp[46], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[54] );
 
       // Amplitude(s) for diagram number 76
-      helas_VVV1_0( w_fp[12], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[12], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 76 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2230,7 +2230,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 77
-      helas_FFV1_0( w_fp[53], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 77 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2246,7 +2246,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 78
-      helas_VVV1_0( w_fp[14], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[14], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 78 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2266,7 +2266,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 79
-      helas_FFV1_0( w_fp[7], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 79 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2282,7 +2282,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 80
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2291,7 +2291,7 @@ namespace mg5amcCpu
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2300,7 +2300,7 @@ namespace mg5amcCpu
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2316,7 +2316,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 81
-      helas_FFV1_0( w_fp[46], w_fp[52], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[52], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 81 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2332,7 +2332,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 82
-      helas_FFV1_0( w_fp[48], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 82 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2348,7 +2348,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 83
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[25], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[25], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 83 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2365,10 +2365,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 84 OF 1240 ***
 
       // Wavefunction(s) for diagram number 84
-      helas_FFV1_2( w_fp[38], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[25] );
+      helas_CD_FFV1_2( w_fp[38], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[25] );
 
       // Amplitude(s) for diagram number 84
-      helas_FFV1_0( w_fp[25], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 84 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2379,10 +2379,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 85 OF 1240 ***
 
       // Wavefunction(s) for diagram number 85
-      helas_FFV1_2( w_fp[38], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] );
+      helas_CD_FFV1_2( w_fp[38], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] );
 
       // Amplitude(s) for diagram number 85
-      helas_FFV1_0( w_fp[48], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 85 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2393,10 +2393,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 86 OF 1240 ***
 
       // Wavefunction(s) for diagram number 86
-      helas_FFV1P0_3( w_fp[38], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[23] );
+      helas_CD_FFV1P0_3( w_fp[38], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[23] );
 
       // Amplitude(s) for diagram number 86
-      helas_VVV1_0( w_fp[9], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[9], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 86 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2416,7 +2416,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 87
-      helas_FFV1_0( w_fp[48], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 87 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2432,7 +2432,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 88
-      helas_VVV1_0( w_fp[14], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[14], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 88 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2452,7 +2452,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 89
-      helas_FFV1_0( w_fp[25], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 89 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2468,7 +2468,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 90
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2477,7 +2477,7 @@ namespace mg5amcCpu
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2486,7 +2486,7 @@ namespace mg5amcCpu
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2502,7 +2502,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 91
-      helas_FFV1_0( w_fp[38], w_fp[52], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[52], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 91 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2518,7 +2518,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 92
-      helas_FFV1_0( w_fp[40], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 92 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2534,7 +2534,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 93
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[28], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[28], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 93 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2551,10 +2551,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 94 OF 1240 ***
 
       // Wavefunction(s) for diagram number 94
-      helas_FFV1_2( w_fp[41], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[28] );
+      helas_CD_FFV1_2( w_fp[41], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[28] );
 
       // Amplitude(s) for diagram number 94
-      helas_FFV1_0( w_fp[28], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 94 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2565,10 +2565,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 95 OF 1240 ***
 
       // Wavefunction(s) for diagram number 95
-      helas_FFV1_2( w_fp[41], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] );
+      helas_CD_FFV1_2( w_fp[41], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] );
 
       // Amplitude(s) for diagram number 95
-      helas_FFV1_0( w_fp[40], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 95 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2579,10 +2579,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 96 OF 1240 ***
 
       // Wavefunction(s) for diagram number 96
-      helas_FFV1P0_3( w_fp[41], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[20] );
+      helas_CD_FFV1P0_3( w_fp[41], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[20] );
 
       // Amplitude(s) for diagram number 96
-      helas_VVV1_0( w_fp[9], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[9], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 96 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2602,7 +2602,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 97
-      helas_FFV1_0( w_fp[40], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 97 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2618,7 +2618,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 98
-      helas_VVV1_0( w_fp[12], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[12], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 98 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2638,7 +2638,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 99
-      helas_FFV1_0( w_fp[28], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 99 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2654,7 +2654,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 100
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2663,7 +2663,7 @@ namespace mg5amcCpu
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2672,7 +2672,7 @@ namespace mg5amcCpu
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2688,7 +2688,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 101
-      helas_FFV1_0( w_fp[41], w_fp[52], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[52], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 101 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2704,7 +2704,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 102
-      helas_FFV1_0( w_fp[42], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[42], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 102 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2720,7 +2720,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 103
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[26], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[26], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 103 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2737,10 +2737,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 104 OF 1240 ***
 
       // Wavefunction(s) for diagram number 104
-      helas_FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[26] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[26] );
 
       // Amplitude(s) for diagram number 104
-      helas_FFV1_0( w_fp[26], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 104 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2753,10 +2753,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 105 OF 1240 ***
 
       // Wavefunction(s) for diagram number 105
-      helas_VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[42] );
+      helas_CD_VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[42] );
 
       // Amplitude(s) for diagram number 105
-      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[52], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 105 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2773,10 +2773,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 106 OF 1240 ***
 
       // Wavefunction(s) for diagram number 106
-      helas_FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
 
       // Amplitude(s) for diagram number 106
-      helas_FFV1_0( w_fp[34], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 106 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2792,7 +2792,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 107
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 107 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2812,7 +2812,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 108
-      helas_FFV1_0( w_fp[3], w_fp[17], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[17], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 108 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2832,7 +2832,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 109
-      helas_FFV1_0( w_fp[26], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 109 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2849,10 +2849,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 110 OF 1240 ***
 
       // Wavefunction(s) for diagram number 110
-      helas_FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
 
       // Amplitude(s) for diagram number 110
-      helas_FFV1_0( w_fp[14], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 110 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2865,10 +2865,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 111 OF 1240 ***
 
       // Wavefunction(s) for diagram number 111
-      helas_VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] );
+      helas_CD_VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] );
 
       // Amplitude(s) for diagram number 111
-      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[52], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 111 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2885,10 +2885,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 112 OF 1240 ***
 
       // Wavefunction(s) for diagram number 112
-      helas_FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
 
       // Amplitude(s) for diagram number 112
-      helas_FFV1_0( w_fp[34], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 112 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2904,7 +2904,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 113
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 113 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2924,7 +2924,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 114
-      helas_FFV1_0( w_fp[3], w_fp[15], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[15], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 114 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2944,7 +2944,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 115
-      helas_FFV1_0( w_fp[14], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 115 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2961,10 +2961,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 116 OF 1240 ***
 
       // Wavefunction(s) for diagram number 116
-      helas_FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 116
-      helas_FFV1_0( w_fp[12], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 116 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2977,10 +2977,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 117 OF 1240 ***
 
       // Wavefunction(s) for diagram number 117
-      helas_VVV1P0_1( w_fp[4], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[19] );
+      helas_CD_VVV1P0_1( w_fp[4], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[19] );
 
       // Amplitude(s) for diagram number 117
-      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[52], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 117 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2997,10 +2997,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 118 OF 1240 ***
 
       // Wavefunction(s) for diagram number 118
-      helas_FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[18] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[18] );
 
       // Amplitude(s) for diagram number 118
-      helas_FFV1_0( w_fp[34], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 118 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3016,7 +3016,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 119
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 119 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3036,7 +3036,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 120
-      helas_FFV1_0( w_fp[3], w_fp[18], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[18], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 120 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3056,7 +3056,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 121
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 121 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3076,7 +3076,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 122
-      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[52], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
@@ -3085,7 +3085,7 @@ namespace mg5amcCpu
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[52], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
@@ -3094,7 +3094,7 @@ namespace mg5amcCpu
       jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[52], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -3110,7 +3110,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 123
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[64] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0];
@@ -3119,7 +3119,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[71] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -3128,7 +3128,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -3141,13 +3141,13 @@ namespace mg5amcCpu
       // *** DIAGRAM 124 OF 1240 ***
 
       // Wavefunction(s) for diagram number 124
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
-      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
-      helas_FFV1_1( w_fp[34], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
-      helas_FFV1_2( w_fp[52], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
+      helas_CD_FFV1_1( w_fp[34], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_2( w_fp[52], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
 
       // Amplitude(s) for diagram number 124
-      helas_FFV1_0( w_fp[22], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 124 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3157,10 +3157,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 125 OF 1240 ***
 
       // Wavefunction(s) for diagram number 125
-      helas_FFV1_2( w_fp[52], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_CD_FFV1_2( w_fp[52], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 125
-      helas_FFV1_0( w_fp[21], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 125 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3170,11 +3170,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 126 OF 1240 ***
 
       // Wavefunction(s) for diagram number 126
-      helas_FFV1_1( w_fp[34], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[55] );
-      helas_FFV1_2( w_fp[52], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[56] );
+      helas_CD_FFV1_1( w_fp[34], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[55] );
+      helas_CD_FFV1_2( w_fp[52], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[56] );
 
       // Amplitude(s) for diagram number 126
-      helas_FFV1_0( w_fp[56], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 126 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3187,7 +3187,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 127
-      helas_FFV1_0( w_fp[21], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 127 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3197,10 +3197,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 128 OF 1240 ***
 
       // Wavefunction(s) for diagram number 128
-      helas_FFV1_1( w_fp[34], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[57] );
+      helas_CD_FFV1_1( w_fp[34], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[57] );
 
       // Amplitude(s) for diagram number 128
-      helas_FFV1_0( w_fp[56], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 128 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3213,7 +3213,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 129
-      helas_FFV1_0( w_fp[22], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 129 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3223,10 +3223,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 130 OF 1240 ***
 
       // Wavefunction(s) for diagram number 130
-      helas_FFV1P0_3( w_fp[52], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[58] );
+      helas_CD_FFV1P0_3( w_fp[52], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[58] );
 
       // Amplitude(s) for diagram number 130
-      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[24], w_fp[6], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 130 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3239,10 +3239,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 131 OF 1240 ***
 
       // Wavefunction(s) for diagram number 131
-      helas_FFV1_1( w_fp[34], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
+      helas_CD_FFV1_1( w_fp[34], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
 
       // Amplitude(s) for diagram number 131
-      helas_FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 131 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3256,7 +3256,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 132
-      helas_FFV1_0( w_fp[52], w_fp[57], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[57], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 132 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3270,7 +3270,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 133
-      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[27], w_fp[5], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 133 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3283,10 +3283,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 134 OF 1240 ***
 
       // Wavefunction(s) for diagram number 134
-      helas_FFV1_1( w_fp[34], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
+      helas_CD_FFV1_1( w_fp[34], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
 
       // Amplitude(s) for diagram number 134
-      helas_FFV1_0( w_fp[52], w_fp[60], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[60], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 134 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3300,7 +3300,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 135
-      helas_FFV1_0( w_fp[52], w_fp[55], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[55], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 135 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3314,7 +3314,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 136
-      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[29], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 136 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3330,7 +3330,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 137
-      helas_FFV1_0( w_fp[52], w_fp[9], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[9], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 137 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3341,10 +3341,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 138 OF 1240 ***
 
       // Wavefunction(s) for diagram number 138
-      helas_FFV1_1( w_fp[34], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] );
+      helas_CD_FFV1_1( w_fp[34], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] );
 
       // Amplitude(s) for diagram number 138
-      helas_FFV1_0( w_fp[52], w_fp[58], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[58], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 138 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3358,17 +3358,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 139
-      helas_FFV1_0( w_fp[52], w_fp[34], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[34], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[34], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[34], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[34], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[34], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[21] += amp_sv[0];
@@ -3377,12 +3377,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 140 OF 1240 ***
 
       // Wavefunction(s) for diagram number 140
-      helas_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[61] );
-      helas_FFV1P0_3( w_fp[3], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[62] );
-      helas_VVV1P0_1( w_fp[61], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[63] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[61] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[62] );
+      helas_CD_VVV1P0_1( w_fp[61], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[63] );
 
       // Amplitude(s) for diagram number 140
-      helas_VVV1_0( w_fp[62], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 140 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3399,10 +3399,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 141 OF 1240 ***
 
       // Wavefunction(s) for diagram number 141
-      helas_VVV1P0_1( w_fp[61], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[64] );
+      helas_CD_VVV1P0_1( w_fp[61], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[64] );
 
       // Amplitude(s) for diagram number 141
-      helas_VVV1_0( w_fp[62], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 141 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3422,7 +3422,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 142
-      helas_VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
@@ -3431,7 +3431,7 @@ namespace mg5amcCpu
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
@@ -3440,7 +3440,7 @@ namespace mg5amcCpu
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -3453,10 +3453,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 143 OF 1240 ***
 
       // Wavefunction(s) for diagram number 143
-      helas_FFV1_2( w_fp[3], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[65] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[65] );
 
       // Amplitude(s) for diagram number 143
-      helas_FFV1_0( w_fp[65], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 143 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3470,7 +3470,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 144
-      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[55], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 144 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3486,7 +3486,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 145
-      helas_FFV1_0( w_fp[65], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 145 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3500,7 +3500,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 146
-      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[57], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 146 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3513,10 +3513,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 147 OF 1240 ***
 
       // Wavefunction(s) for diagram number 147
-      helas_FFV1_1( w_fp[34], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] );
+      helas_CD_FFV1_1( w_fp[34], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] );
 
       // Amplitude(s) for diagram number 147
-      helas_FFV1_0( w_fp[38], w_fp[66], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[66], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 147 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3527,10 +3527,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 148 OF 1240 ***
 
       // Wavefunction(s) for diagram number 148
-      helas_FFV1P0_3( w_fp[38], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[67] );
+      helas_CD_FFV1P0_3( w_fp[38], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[67] );
 
       // Amplitude(s) for diagram number 148
-      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[6], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 148 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3546,7 +3546,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 149
-      helas_FFV1_0( w_fp[38], w_fp[57], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[57], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 149 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3560,7 +3560,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 150
-      helas_FFV1_0( w_fp[41], w_fp[66], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[66], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 150 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3571,10 +3571,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 151 OF 1240 ***
 
       // Wavefunction(s) for diagram number 151
-      helas_FFV1P0_3( w_fp[41], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[68] );
+      helas_CD_FFV1P0_3( w_fp[41], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[68] );
 
       // Amplitude(s) for diagram number 151
-      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[5], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 151 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3590,7 +3590,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 152
-      helas_FFV1_0( w_fp[41], w_fp[55], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[55], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 152 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3604,7 +3604,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 153
-      helas_FFV1_0( w_fp[3], w_fp[66], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[66], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 153 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3620,7 +3620,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 154
-      helas_VVV1_0( w_fp[61], w_fp[29], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[29], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 154 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3640,7 +3640,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 155
-      helas_FFV1_0( w_fp[3], w_fp[58], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[58], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 155 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3653,11 +3653,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 156 OF 1240 ***
 
       // Wavefunction(s) for diagram number 156
-      helas_VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[66] );
-      helas_VVV1P0_1( w_fp[66], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[69] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[66] );
+      helas_CD_VVV1P0_1( w_fp[66], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[69] );
 
       // Amplitude(s) for diagram number 156
-      helas_VVV1_0( w_fp[62], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 156 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3674,10 +3674,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 157 OF 1240 ***
 
       // Wavefunction(s) for diagram number 157
-      helas_VVV1P0_1( w_fp[66], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[70] );
+      helas_CD_VVV1P0_1( w_fp[66], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[70] );
 
       // Amplitude(s) for diagram number 157
-      helas_VVV1_0( w_fp[62], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 157 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3697,7 +3697,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 158
-      helas_VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -3706,7 +3706,7 @@ namespace mg5amcCpu
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -3715,7 +3715,7 @@ namespace mg5amcCpu
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -3728,10 +3728,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 159 OF 1240 ***
 
       // Wavefunction(s) for diagram number 159
-      helas_FFV1_2( w_fp[3], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
 
       // Amplitude(s) for diagram number 159
-      helas_FFV1_0( w_fp[71], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 159 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3745,7 +3745,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 160
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 160 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3761,7 +3761,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 161
-      helas_FFV1_0( w_fp[71], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 161 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3775,7 +3775,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 162
-      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[57], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 162 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3788,10 +3788,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 163 OF 1240 ***
 
       // Wavefunction(s) for diagram number 163
-      helas_FFV1_1( w_fp[34], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] );
+      helas_CD_FFV1_1( w_fp[34], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] );
 
       // Amplitude(s) for diagram number 163
-      helas_FFV1_0( w_fp[46], w_fp[72], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[72], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 163 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3802,10 +3802,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 164 OF 1240 ***
 
       // Wavefunction(s) for diagram number 164
-      helas_FFV1P0_3( w_fp[46], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[73] );
+      helas_CD_FFV1P0_3( w_fp[46], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[73] );
 
       // Amplitude(s) for diagram number 164
-      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[6], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 164 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3821,7 +3821,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 165
-      helas_FFV1_0( w_fp[46], w_fp[57], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[57], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 165 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3835,7 +3835,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 166
-      helas_FFV1_0( w_fp[41], w_fp[72], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[72], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 166 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3849,7 +3849,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 167
-      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[4], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 167 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3865,7 +3865,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 168
-      helas_FFV1_0( w_fp[41], w_fp[9], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[9], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 168 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3879,7 +3879,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 169
-      helas_FFV1_0( w_fp[3], w_fp[72], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[72], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 169 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3895,7 +3895,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 170
-      helas_VVV1_0( w_fp[66], w_fp[27], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[27], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 170 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3915,7 +3915,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 171
-      helas_FFV1_0( w_fp[3], w_fp[60], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[60], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 171 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3928,11 +3928,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 172 OF 1240 ***
 
       // Wavefunction(s) for diagram number 172
-      helas_VVV1P0_1( w_fp[1], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[72] );
-      helas_VVV1P0_1( w_fp[72], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[74] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[72] );
+      helas_CD_VVV1P0_1( w_fp[72], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[74] );
 
       // Amplitude(s) for diagram number 172
-      helas_VVV1_0( w_fp[62], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 172 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3949,10 +3949,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 173 OF 1240 ***
 
       // Wavefunction(s) for diagram number 173
-      helas_VVV1P0_1( w_fp[72], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[75] );
+      helas_CD_VVV1P0_1( w_fp[72], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[75] );
 
       // Amplitude(s) for diagram number 173
-      helas_VVV1_0( w_fp[62], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 173 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -3972,7 +3972,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 174
-      helas_VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -3981,7 +3981,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -3990,7 +3990,7 @@ namespace mg5amcCpu
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -4003,10 +4003,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 175 OF 1240 ***
 
       // Wavefunction(s) for diagram number 175
-      helas_FFV1_2( w_fp[3], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[76] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[76] );
 
       // Amplitude(s) for diagram number 175
-      helas_FFV1_0( w_fp[76], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 175 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4020,7 +4020,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 176
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 176 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4036,7 +4036,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 177
-      helas_FFV1_0( w_fp[76], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 177 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4050,7 +4050,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 178
-      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[55], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 178 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4063,10 +4063,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 179 OF 1240 ***
 
       // Wavefunction(s) for diagram number 179
-      helas_FFV1_1( w_fp[34], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
+      helas_CD_FFV1_1( w_fp[34], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
 
       // Amplitude(s) for diagram number 179
-      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 179 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4080,7 +4080,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 180
-      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[5], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 180 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4096,7 +4096,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 181
-      helas_FFV1_0( w_fp[46], w_fp[55], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[55], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 181 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4110,7 +4110,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 182
-      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 182 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4124,7 +4124,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 183
-      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[4], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 183 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4140,7 +4140,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 184
-      helas_FFV1_0( w_fp[38], w_fp[9], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[9], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 184 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4154,7 +4154,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 185
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 185 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4170,7 +4170,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 186
-      helas_VVV1_0( w_fp[72], w_fp[24], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[24], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 186 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4190,7 +4190,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 187
-      helas_FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 187 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4203,10 +4203,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 188 OF 1240 ***
 
       // Wavefunction(s) for diagram number 188
-      helas_FFV1_1( w_fp[34], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
+      helas_CD_FFV1_1( w_fp[34], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
 
       // Amplitude(s) for diagram number 188
-      helas_FFV1_0( w_fp[7], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 188 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4219,7 +4219,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 189
-      helas_FFV1_0( w_fp[53], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 189 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4229,10 +4229,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 190 OF 1240 ***
 
       // Wavefunction(s) for diagram number 190
-      helas_FFV1_2( w_fp[46], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[78] );
+      helas_CD_FFV1_2( w_fp[46], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[78] );
 
       // Amplitude(s) for diagram number 190
-      helas_FFV1_0( w_fp[78], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 190 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4245,7 +4245,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 191
-      helas_FFV1_0( w_fp[53], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 191 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4258,7 +4258,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 192
-      helas_FFV1_0( w_fp[78], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 192 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4271,7 +4271,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 193
-      helas_FFV1_0( w_fp[7], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 193 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4284,7 +4284,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 194
-      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 194 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4298,7 +4298,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 195
-      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[29], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 195 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4314,7 +4314,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 196
-      helas_FFV1_0( w_fp[46], w_fp[58], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[58], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 196 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4328,7 +4328,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 197
-      helas_FFV1_0( w_fp[25], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 197 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4341,7 +4341,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 198
-      helas_FFV1_0( w_fp[48], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 198 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4351,10 +4351,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 199 OF 1240 ***
 
       // Wavefunction(s) for diagram number 199
-      helas_FFV1_2( w_fp[38], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] );
+      helas_CD_FFV1_2( w_fp[38], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] );
 
       // Amplitude(s) for diagram number 199
-      helas_FFV1_0( w_fp[58], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 199 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4367,7 +4367,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 200
-      helas_FFV1_0( w_fp[48], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 200 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4380,7 +4380,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 201
-      helas_FFV1_0( w_fp[58], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 201 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4393,7 +4393,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 202
-      helas_FFV1_0( w_fp[25], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 202 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4406,7 +4406,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 203
-      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 203 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4420,7 +4420,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 204
-      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[27], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 204 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4436,7 +4436,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 205
-      helas_FFV1_0( w_fp[38], w_fp[60], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[60], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 205 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4450,7 +4450,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 206
-      helas_FFV1_0( w_fp[28], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 206 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4463,7 +4463,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 207
-      helas_FFV1_0( w_fp[40], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 207 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4473,10 +4473,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 208 OF 1240 ***
 
       // Wavefunction(s) for diagram number 208
-      helas_FFV1_2( w_fp[41], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
+      helas_CD_FFV1_2( w_fp[41], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
 
       // Amplitude(s) for diagram number 208
-      helas_FFV1_0( w_fp[60], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 208 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4489,7 +4489,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 209
-      helas_FFV1_0( w_fp[40], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 209 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4502,7 +4502,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 210
-      helas_FFV1_0( w_fp[60], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 210 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4515,7 +4515,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 211
-      helas_FFV1_0( w_fp[28], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 211 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4528,7 +4528,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 212
-      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 212 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4542,7 +4542,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 213
-      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[24], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 213 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4558,7 +4558,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 214
-      helas_FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 214 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4572,7 +4572,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 215
-      helas_FFV1_0( w_fp[26], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 215 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4586,7 +4586,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 216
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 216 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4599,10 +4599,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 217 OF 1240 ***
 
       // Wavefunction(s) for diagram number 217
-      helas_VVV1P0_1( w_fp[1], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[59] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[59] );
 
       // Amplitude(s) for diagram number 217
-      helas_VVV1_0( w_fp[62], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 217 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4622,7 +4622,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 218
-      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 218 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4642,7 +4642,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 219
-      helas_VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -4651,7 +4651,7 @@ namespace mg5amcCpu
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -4660,7 +4660,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -4676,7 +4676,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 220
-      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[57], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 220 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4692,7 +4692,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 221
-      helas_FFV1_0( w_fp[26], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 221 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4706,7 +4706,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 222
-      helas_FFV1_0( w_fp[14], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 222 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4720,7 +4720,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 223
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 223 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4733,10 +4733,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 224 OF 1240 ***
 
       // Wavefunction(s) for diagram number 224
-      helas_VVV1P0_1( w_fp[1], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[68] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[68] );
 
       // Amplitude(s) for diagram number 224
-      helas_VVV1_0( w_fp[62], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 224 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4756,7 +4756,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 225
-      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 225 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4776,7 +4776,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 226
-      helas_VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
@@ -4785,7 +4785,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
@@ -4794,7 +4794,7 @@ namespace mg5amcCpu
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
@@ -4810,7 +4810,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 227
-      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[55], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 227 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4826,7 +4826,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 228
-      helas_FFV1_0( w_fp[14], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 228 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4840,7 +4840,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 229
-      helas_FFV1_0( w_fp[12], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 229 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4854,7 +4854,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 230
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 230 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4867,10 +4867,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 231 OF 1240 ***
 
       // Wavefunction(s) for diagram number 231
-      helas_VVV1P0_1( w_fp[1], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[67] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[67] );
 
       // Amplitude(s) for diagram number 231
-      helas_VVV1_0( w_fp[62], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 231 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4890,7 +4890,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 232
-      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 232 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4910,7 +4910,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 233
-      helas_VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
@@ -4919,7 +4919,7 @@ namespace mg5amcCpu
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
@@ -4928,7 +4928,7 @@ namespace mg5amcCpu
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -4944,7 +4944,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 234
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 234 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4960,7 +4960,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 235
-      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 235 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -4971,12 +4971,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 236 OF 1240 ***
 
       // Wavefunction(s) for diagram number 236
-      helas_VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[73] );
-      helas_VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[79] );
-      helas_VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[80] );
+      helas_CD_VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[73] );
+      helas_CD_VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[79] );
+      helas_CD_VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[80] );
 
       // Amplitude(s) for diagram number 236
-      helas_VVV1_0( w_fp[73], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[73], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -4985,7 +4985,7 @@ namespace mg5amcCpu
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[79], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[79], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -4994,7 +4994,7 @@ namespace mg5amcCpu
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[80], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[80], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -5010,17 +5010,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 237
-      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[57], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] += amp_sv[0];
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[57], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[57], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
       jamp_sv[22] += amp_sv[0];
@@ -5032,17 +5032,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 238
-      helas_FFV1_0( w_fp[41], w_fp[34], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[34], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[34], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[34], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[12] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[34], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[34], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[12] += amp_sv[0];
@@ -5051,12 +5051,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 239 OF 1240 ***
 
       // Wavefunction(s) for diagram number 239
-      helas_VVVV1P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[57] );
-      helas_VVVV3P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[81] );
-      helas_VVVV4P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[82] );
+      helas_CD_VVVV1P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[57] );
+      helas_CD_VVVV3P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[81] );
+      helas_CD_VVVV4P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[82] );
 
       // Amplitude(s) for diagram number 239
-      helas_VVV1_0( w_fp[57], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[57], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
@@ -5065,7 +5065,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[81], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[81], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
@@ -5074,7 +5074,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[82], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[82], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -5090,17 +5090,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 240
-      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[55], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] += amp_sv[0];
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[55], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[16] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[55], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
       jamp_sv[16] += amp_sv[0];
@@ -5112,17 +5112,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 241
-      helas_FFV1_0( w_fp[38], w_fp[34], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[34], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[34], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[34], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[18] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[34], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[34], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[18] += amp_sv[0];
@@ -5131,12 +5131,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 242 OF 1240 ***
 
       // Wavefunction(s) for diagram number 242
-      helas_VVVV1P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[55] );
-      helas_VVVV3P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[83] );
-      helas_VVVV4P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[84] );
+      helas_CD_VVVV1P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[55] );
+      helas_CD_VVVV3P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[83] );
+      helas_CD_VVVV4P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[84] );
 
       // Amplitude(s) for diagram number 242
-      helas_VVV1_0( w_fp[55], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[55], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
@@ -5145,7 +5145,7 @@ namespace mg5amcCpu
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[83], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[83], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -5154,7 +5154,7 @@ namespace mg5amcCpu
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[84], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[84], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -5170,17 +5170,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 243
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[10] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
       jamp_sv[10] += amp_sv[0];
@@ -5192,17 +5192,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 244
-      helas_FFV1_0( w_fp[46], w_fp[34], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[34], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[34], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[34], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[34], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[34], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[19] += amp_sv[0];
@@ -5214,17 +5214,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 245
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[4] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
@@ -5236,7 +5236,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 246
-      helas_VVV1_0( w_fp[1], w_fp[30], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[30], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
@@ -5245,7 +5245,7 @@ namespace mg5amcCpu
       jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[31], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[31], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
@@ -5254,7 +5254,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[32], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[32], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -5267,13 +5267,13 @@ namespace mg5amcCpu
       // *** DIAGRAM 247 OF 1240 ***
 
       // Wavefunction(s) for diagram number 247
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
-      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
-      helas_FFV1_2( w_fp[62], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
-      helas_FFV1_1( w_fp[77], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
+      helas_CD_FFV1_2( w_fp[62], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
+      helas_CD_FFV1_1( w_fp[77], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 247
-      helas_FFV1_0( w_fp[34], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 247 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5283,10 +5283,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 248 OF 1240 ***
 
       // Wavefunction(s) for diagram number 248
-      helas_FFV1_1( w_fp[77], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[85] );
+      helas_CD_FFV1_1( w_fp[77], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[85] );
 
       // Amplitude(s) for diagram number 248
-      helas_FFV1_0( w_fp[34], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 248 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5296,11 +5296,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 249 OF 1240 ***
 
       // Wavefunction(s) for diagram number 249
-      helas_FFV1_2( w_fp[62], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] );
-      helas_FFV1_1( w_fp[77], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[87] );
+      helas_CD_FFV1_2( w_fp[62], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] );
+      helas_CD_FFV1_1( w_fp[77], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[87] );
 
       // Amplitude(s) for diagram number 249
-      helas_FFV1_0( w_fp[86], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 249 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5313,7 +5313,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 250
-      helas_FFV1_0( w_fp[86], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 250 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5323,10 +5323,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 251 OF 1240 ***
 
       // Wavefunction(s) for diagram number 251
-      helas_FFV1_2( w_fp[62], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] );
+      helas_CD_FFV1_2( w_fp[62], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] );
 
       // Amplitude(s) for diagram number 251
-      helas_FFV1_0( w_fp[88], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 251 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5339,7 +5339,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 252
-      helas_FFV1_0( w_fp[88], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 252 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5349,10 +5349,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 253 OF 1240 ***
 
       // Wavefunction(s) for diagram number 253
-      helas_FFV1P0_3( w_fp[62], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[89] );
+      helas_CD_FFV1P0_3( w_fp[62], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[89] );
 
       // Amplitude(s) for diagram number 253
-      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[24], w_fp[6], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 253 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5365,10 +5365,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 254 OF 1240 ***
 
       // Wavefunction(s) for diagram number 254
-      helas_FFV1_2( w_fp[62], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
+      helas_CD_FFV1_2( w_fp[62], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
 
       // Amplitude(s) for diagram number 254
-      helas_FFV1_0( w_fp[90], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 254 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5382,7 +5382,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 255
-      helas_FFV1_0( w_fp[88], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 255 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5396,7 +5396,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 256
-      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[27], w_fp[5], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 256 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5409,10 +5409,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 257 OF 1240 ***
 
       // Wavefunction(s) for diagram number 257
-      helas_FFV1_2( w_fp[62], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] );
+      helas_CD_FFV1_2( w_fp[62], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] );
 
       // Amplitude(s) for diagram number 257
-      helas_FFV1_0( w_fp[91], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[91], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 257 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5426,7 +5426,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 258
-      helas_FFV1_0( w_fp[86], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 258 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5440,7 +5440,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 259
-      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[29], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 259 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5456,7 +5456,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 260
-      helas_FFV1_0( w_fp[34], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 260 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5467,10 +5467,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 261 OF 1240 ***
 
       // Wavefunction(s) for diagram number 261
-      helas_FFV1_2( w_fp[62], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] );
+      helas_CD_FFV1_2( w_fp[62], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] );
 
       // Amplitude(s) for diagram number 261
-      helas_FFV1_0( w_fp[89], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[89], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 261 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5484,17 +5484,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 262
-      helas_FFV1_0( w_fp[62], w_fp[77], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[77], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[33] += amp_sv[0];
       jamp_sv[35] -= amp_sv[0];
       jamp_sv[41] -= amp_sv[0];
       jamp_sv[47] += amp_sv[0];
-      helas_FFV1_0( w_fp[62], w_fp[77], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[77], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[35] -= amp_sv[0];
       jamp_sv[39] += amp_sv[0];
       jamp_sv[41] -= amp_sv[0];
       jamp_sv[45] += amp_sv[0];
-      helas_FFV1_0( w_fp[62], w_fp[77], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[77], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[33] -= amp_sv[0];
       jamp_sv[39] += amp_sv[0];
       jamp_sv[45] += amp_sv[0];
@@ -5503,10 +5503,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 263 OF 1240 ***
 
       // Wavefunction(s) for diagram number 263
-      helas_FFV1P0_3( w_fp[62], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[92] );
+      helas_CD_FFV1P0_3( w_fp[62], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[92] );
 
       // Amplitude(s) for diagram number 263
-      helas_VVV1_0( w_fp[92], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 263 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5526,7 +5526,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 264
-      helas_VVV1_0( w_fp[92], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 264 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5546,7 +5546,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 265
-      helas_VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[81] += cxtype( 0, 1 ) * amp_sv[0];
@@ -5555,7 +5555,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[81] += cxtype( 0, 1 ) * amp_sv[0];
@@ -5564,7 +5564,7 @@ namespace mg5amcCpu
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -5577,10 +5577,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 266 OF 1240 ***
 
       // Wavefunction(s) for diagram number 266
-      helas_FFV1_1( w_fp[2], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[93] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[93] );
 
       // Amplitude(s) for diagram number 266
-      helas_FFV1_0( w_fp[86], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 266 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5594,7 +5594,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 267
-      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 267 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5610,7 +5610,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 268
-      helas_FFV1_0( w_fp[88], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 268 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5624,7 +5624,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 269
-      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 269 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5637,10 +5637,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 270 OF 1240 ***
 
       // Wavefunction(s) for diagram number 270
-      helas_FFV1_2( w_fp[62], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] );
+      helas_CD_FFV1_2( w_fp[62], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] );
 
       // Amplitude(s) for diagram number 270
-      helas_FFV1_0( w_fp[94], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[94], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 270 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5651,10 +5651,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 271 OF 1240 ***
 
       // Wavefunction(s) for diagram number 271
-      helas_FFV1P0_3( w_fp[62], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[95] );
+      helas_CD_FFV1P0_3( w_fp[62], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[95] );
 
       // Amplitude(s) for diagram number 271
-      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 271 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5670,7 +5670,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 272
-      helas_FFV1_0( w_fp[88], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 272 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5684,7 +5684,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 273
-      helas_FFV1_0( w_fp[94], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[94], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 273 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5695,10 +5695,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 274 OF 1240 ***
 
       // Wavefunction(s) for diagram number 274
-      helas_FFV1P0_3( w_fp[62], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[96] );
+      helas_CD_FFV1P0_3( w_fp[62], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[96] );
 
       // Amplitude(s) for diagram number 274
-      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[5], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 274 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5714,7 +5714,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 275
-      helas_FFV1_0( w_fp[86], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 275 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5728,7 +5728,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 276
-      helas_FFV1_0( w_fp[94], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[94], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 276 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5744,7 +5744,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 277
-      helas_VVV1_0( w_fp[61], w_fp[29], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[29], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 277 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5764,7 +5764,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 278
-      helas_FFV1_0( w_fp[89], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[89], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 278 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5780,7 +5780,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 279
-      helas_VVV1_0( w_fp[92], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 279 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5800,7 +5800,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 280
-      helas_VVV1_0( w_fp[92], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 280 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5820,7 +5820,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 281
-      helas_VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -5829,7 +5829,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -5838,7 +5838,7 @@ namespace mg5amcCpu
       jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -5851,10 +5851,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 282 OF 1240 ***
 
       // Wavefunction(s) for diagram number 282
-      helas_FFV1_1( w_fp[2], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] );
 
       // Amplitude(s) for diagram number 282
-      helas_FFV1_0( w_fp[34], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 282 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5868,7 +5868,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 283
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 283 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5884,7 +5884,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 284
-      helas_FFV1_0( w_fp[88], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 284 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5898,7 +5898,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 285
-      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 285 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5911,10 +5911,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 286 OF 1240 ***
 
       // Wavefunction(s) for diagram number 286
-      helas_FFV1_2( w_fp[62], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] );
+      helas_CD_FFV1_2( w_fp[62], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] );
 
       // Amplitude(s) for diagram number 286
-      helas_FFV1_0( w_fp[97], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[97], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 286 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5925,10 +5925,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 287 OF 1240 ***
 
       // Wavefunction(s) for diagram number 287
-      helas_FFV1P0_3( w_fp[62], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[98] );
+      helas_CD_FFV1P0_3( w_fp[62], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[98] );
 
       // Amplitude(s) for diagram number 287
-      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[6], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 287 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5944,7 +5944,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 288
-      helas_FFV1_0( w_fp[88], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 288 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5958,7 +5958,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 289
-      helas_FFV1_0( w_fp[97], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[97], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 289 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5972,7 +5972,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 290
-      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[4], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 290 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -5988,7 +5988,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 291
-      helas_FFV1_0( w_fp[34], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 291 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6002,7 +6002,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 292
-      helas_FFV1_0( w_fp[97], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[97], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 292 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6018,7 +6018,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 293
-      helas_VVV1_0( w_fp[66], w_fp[27], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[27], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 293 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6038,7 +6038,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 294
-      helas_FFV1_0( w_fp[91], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[91], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 294 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6054,7 +6054,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 295
-      helas_VVV1_0( w_fp[92], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 295 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6074,7 +6074,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 296
-      helas_VVV1_0( w_fp[92], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 296 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6094,7 +6094,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 297
-      helas_VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -6103,7 +6103,7 @@ namespace mg5amcCpu
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[47] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -6112,7 +6112,7 @@ namespace mg5amcCpu
       jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -6125,10 +6125,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 298 OF 1240 ***
 
       // Wavefunction(s) for diagram number 298
-      helas_FFV1_1( w_fp[2], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] );
 
       // Amplitude(s) for diagram number 298
-      helas_FFV1_0( w_fp[34], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 298 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6142,7 +6142,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 299
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 299 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6158,7 +6158,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 300
-      helas_FFV1_0( w_fp[86], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 300 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6172,7 +6172,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 301
-      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 301 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6185,10 +6185,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 302 OF 1240 ***
 
       // Wavefunction(s) for diagram number 302
-      helas_FFV1_2( w_fp[62], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_CD_FFV1_2( w_fp[62], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 302
-      helas_FFV1_0( w_fp[99], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 302 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6202,7 +6202,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 303
-      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 303 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6218,7 +6218,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 304
-      helas_FFV1_0( w_fp[86], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 304 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6232,7 +6232,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 305
-      helas_FFV1_0( w_fp[99], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 305 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6246,7 +6246,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 306
-      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[4], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 306 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6262,7 +6262,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 307
-      helas_FFV1_0( w_fp[34], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 307 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6276,7 +6276,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 308
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 308 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6292,7 +6292,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 309
-      helas_VVV1_0( w_fp[72], w_fp[24], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[24], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 309 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6312,7 +6312,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 310
-      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 310 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6325,10 +6325,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 311 OF 1240 ***
 
       // Wavefunction(s) for diagram number 311
-      helas_FFV1_2( w_fp[62], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_CD_FFV1_2( w_fp[62], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 311
-      helas_FFV1_0( w_fp[99], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 311 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6341,7 +6341,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 312
-      helas_FFV1_0( w_fp[99], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 312 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6351,10 +6351,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 313 OF 1240 ***
 
       // Wavefunction(s) for diagram number 313
-      helas_FFV1_1( w_fp[33], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[100] );
+      helas_CD_FFV1_1( w_fp[33], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[100] );
 
       // Amplitude(s) for diagram number 313
-      helas_FFV1_0( w_fp[86], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 313 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6367,7 +6367,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 314
-      helas_FFV1_0( w_fp[86], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 314 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6380,7 +6380,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 315
-      helas_FFV1_0( w_fp[88], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 315 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6393,7 +6393,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 316
-      helas_FFV1_0( w_fp[88], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 316 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6406,7 +6406,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 317
-      helas_FFV1_0( w_fp[99], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 317 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6420,7 +6420,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 318
-      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[29], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 318 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6436,7 +6436,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 319
-      helas_FFV1_0( w_fp[89], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[89], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 319 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6450,7 +6450,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 320
-      helas_FFV1_0( w_fp[99], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 320 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6463,7 +6463,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 321
-      helas_FFV1_0( w_fp[99], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 321 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6473,10 +6473,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 322 OF 1240 ***
 
       // Wavefunction(s) for diagram number 322
-      helas_FFV1_1( w_fp[39], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] );
+      helas_CD_FFV1_1( w_fp[39], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] );
 
       // Amplitude(s) for diagram number 322
-      helas_FFV1_0( w_fp[34], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 322 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6489,7 +6489,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 323
-      helas_FFV1_0( w_fp[34], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 323 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6502,7 +6502,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 324
-      helas_FFV1_0( w_fp[88], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 324 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6515,7 +6515,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 325
-      helas_FFV1_0( w_fp[88], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 325 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6528,7 +6528,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 326
-      helas_FFV1_0( w_fp[99], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 326 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6542,7 +6542,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 327
-      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[27], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 327 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6558,7 +6558,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 328
-      helas_FFV1_0( w_fp[91], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[91], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 328 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6572,7 +6572,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 329
-      helas_FFV1_0( w_fp[99], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 329 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6585,7 +6585,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 330
-      helas_FFV1_0( w_fp[99], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 330 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6595,10 +6595,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 331 OF 1240 ***
 
       // Wavefunction(s) for diagram number 331
-      helas_FFV1_1( w_fp[47], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] );
+      helas_CD_FFV1_1( w_fp[47], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] );
 
       // Amplitude(s) for diagram number 331
-      helas_FFV1_0( w_fp[34], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 331 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6611,7 +6611,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 332
-      helas_FFV1_0( w_fp[34], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 332 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6624,7 +6624,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 333
-      helas_FFV1_0( w_fp[86], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 333 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6637,7 +6637,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 334
-      helas_FFV1_0( w_fp[86], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 334 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6650,7 +6650,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 335
-      helas_FFV1_0( w_fp[99], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 335 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6664,7 +6664,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 336
-      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[24], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 336 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6680,7 +6680,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 337
-      helas_FFV1_0( w_fp[90], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 337 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6694,7 +6694,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 338
-      helas_FFV1_0( w_fp[99], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 338 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6708,7 +6708,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 339
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 339 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6724,7 +6724,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 340
-      helas_VVV1_0( w_fp[92], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 340 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6744,7 +6744,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 341
-      helas_VVV1_0( w_fp[92], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 341 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6764,7 +6764,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 342
-      helas_VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
@@ -6773,7 +6773,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
@@ -6782,7 +6782,7 @@ namespace mg5amcCpu
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -6798,7 +6798,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 343
-      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 343 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6814,7 +6814,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 344
-      helas_FFV1_0( w_fp[88], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 344 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6828,7 +6828,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 345
-      helas_FFV1_0( w_fp[99], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 345 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6842,7 +6842,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 346
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 346 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6858,7 +6858,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 347
-      helas_VVV1_0( w_fp[92], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 347 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6878,7 +6878,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 348
-      helas_VVV1_0( w_fp[92], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 348 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6898,7 +6898,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 349
-      helas_VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0];
@@ -6907,7 +6907,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0];
@@ -6916,7 +6916,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0];
@@ -6932,7 +6932,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 350
-      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 350 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6948,7 +6948,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 351
-      helas_FFV1_0( w_fp[86], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 351 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6962,7 +6962,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 352
-      helas_FFV1_0( w_fp[99], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 352 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6976,7 +6976,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 353
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 353 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -6992,7 +6992,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 354
-      helas_VVV1_0( w_fp[92], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 354 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7012,7 +7012,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 355
-      helas_VVV1_0( w_fp[92], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 355 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7032,7 +7032,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 356
-      helas_VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7041,7 +7041,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7050,7 +7050,7 @@ namespace mg5amcCpu
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -7066,7 +7066,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 357
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 357 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7082,7 +7082,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 358
-      helas_FFV1_0( w_fp[34], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 358 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7096,7 +7096,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 359
-      helas_VVV1_0( w_fp[73], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[73], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7105,7 +7105,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[79], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[79], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7114,7 +7114,7 @@ namespace mg5amcCpu
       jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[80], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[80], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -7130,17 +7130,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 360
-      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[33] += amp_sv[0];
       jamp_sv[39] -= amp_sv[0];
       jamp_sv[63] -= amp_sv[0];
       jamp_sv[87] += amp_sv[0];
-      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[39] -= amp_sv[0];
       jamp_sv[57] += amp_sv[0];
       jamp_sv[63] -= amp_sv[0];
       jamp_sv[81] += amp_sv[0];
-      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[33] -= amp_sv[0];
       jamp_sv[57] += amp_sv[0];
       jamp_sv[81] += amp_sv[0];
@@ -7152,17 +7152,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 361
-      helas_FFV1_0( w_fp[62], w_fp[47], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[47], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[105] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_FFV1_0( w_fp[62], w_fp[47], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[47], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_FFV1_0( w_fp[62], w_fp[47], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[47], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
       jamp_sv[117] += amp_sv[0];
@@ -7174,7 +7174,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 362
-      helas_VVV1_0( w_fp[57], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[57], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7183,7 +7183,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[81], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[81], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7192,7 +7192,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[82], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[82], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -7208,17 +7208,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 363
-      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[35] += amp_sv[0];
       jamp_sv[45] -= amp_sv[0];
       jamp_sv[69] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
-      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[45] -= amp_sv[0];
       jamp_sv[59] += amp_sv[0];
       jamp_sv[69] -= amp_sv[0];
       jamp_sv[105] += amp_sv[0];
-      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[35] -= amp_sv[0];
       jamp_sv[59] += amp_sv[0];
       jamp_sv[105] += amp_sv[0];
@@ -7230,17 +7230,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 364
-      helas_FFV1_0( w_fp[62], w_fp[39], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[39], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[81] += amp_sv[0];
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
-      helas_FFV1_0( w_fp[62], w_fp[39], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[39], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[87] += amp_sv[0];
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
-      helas_FFV1_0( w_fp[62], w_fp[39], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[39], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[81] -= amp_sv[0];
       jamp_sv[87] += amp_sv[0];
       jamp_sv[93] += amp_sv[0];
@@ -7252,7 +7252,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 365
-      helas_VVV1_0( w_fp[55], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[55], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7261,7 +7261,7 @@ namespace mg5amcCpu
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[83], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[83], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7270,7 +7270,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[84], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[84], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7286,17 +7286,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 366
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[41] += amp_sv[0];
       jamp_sv[47] -= amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[47] -= amp_sv[0];
       jamp_sv[83] += amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[107] += amp_sv[0];
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[41] -= amp_sv[0];
       jamp_sv[83] += amp_sv[0];
       jamp_sv[107] += amp_sv[0];
@@ -7308,17 +7308,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 367
-      helas_FFV1_0( w_fp[62], w_fp[33], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[33], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[57] += amp_sv[0];
       jamp_sv[59] -= amp_sv[0];
       jamp_sv[65] -= amp_sv[0];
       jamp_sv[71] += amp_sv[0];
-      helas_FFV1_0( w_fp[62], w_fp[33], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[33], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[59] -= amp_sv[0];
       jamp_sv[63] += amp_sv[0];
       jamp_sv[65] -= amp_sv[0];
       jamp_sv[69] += amp_sv[0];
-      helas_FFV1_0( w_fp[62], w_fp[33], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[33], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[57] -= amp_sv[0];
       jamp_sv[63] += amp_sv[0];
       jamp_sv[69] += amp_sv[0];
@@ -7330,17 +7330,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 368
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[65] += amp_sv[0];
       jamp_sv[71] -= amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[71] -= amp_sv[0];
       jamp_sv[89] += amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[65] -= amp_sv[0];
       jamp_sv[89] += amp_sv[0];
       jamp_sv[113] += amp_sv[0];
@@ -7352,7 +7352,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 369
-      helas_VVV1_0( w_fp[1], w_fp[30], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[30], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7361,7 +7361,7 @@ namespace mg5amcCpu
       jamp_sv[71] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[31], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[31], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7370,7 +7370,7 @@ namespace mg5amcCpu
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[32], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[32], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -7383,11 +7383,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 370 OF 1240 ***
 
       // Wavefunction(s) for diagram number 370
-      helas_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[92] );
-      helas_FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[92] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 370
-      helas_FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 370 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7401,7 +7401,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 371
-      helas_FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 371 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7412,11 +7412,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 372 OF 1240 ***
 
       // Wavefunction(s) for diagram number 372
-      helas_VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[62] );
-      helas_FFV1P0_3( w_fp[3], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[34] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[62] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[34] );
 
       // Amplitude(s) for diagram number 372
-      helas_VVV1_0( w_fp[62], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 372 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7436,7 +7436,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 373
-      helas_FFV1_0( w_fp[3], w_fp[85], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[85], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 373 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7449,10 +7449,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 374 OF 1240 ***
 
       // Wavefunction(s) for diagram number 374
-      helas_VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[86] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 374
-      helas_VVV1_0( w_fp[86], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 374 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7472,7 +7472,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 375
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 375 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7485,12 +7485,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 376 OF 1240 ***
 
       // Wavefunction(s) for diagram number 376
-      helas_VVVV1P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] );
-      helas_VVVV3P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] );
-      helas_VVVV4P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] );
+      helas_CD_VVVV1P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] );
+      helas_CD_VVVV3P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] );
+      helas_CD_VVVV4P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] );
 
       // Amplitude(s) for diagram number 376
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7499,7 +7499,7 @@ namespace mg5amcCpu
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -7508,7 +7508,7 @@ namespace mg5amcCpu
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -7521,10 +7521,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 377 OF 1240 ***
 
       // Wavefunction(s) for diagram number 377
-      helas_FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[95] );
+      helas_CD_FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[95] );
 
       // Amplitude(s) for diagram number 377
-      helas_FFV1_0( w_fp[38], w_fp[95], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[95], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 377 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7535,10 +7535,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 378 OF 1240 ***
 
       // Wavefunction(s) for diagram number 378
-      helas_FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
+      helas_CD_FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
 
       // Amplitude(s) for diagram number 378
-      helas_FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 378 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7552,7 +7552,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 379
-      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 379 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7568,7 +7568,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 380
-      helas_FFV1_0( w_fp[41], w_fp[95], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[95], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 380 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7579,10 +7579,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 381 OF 1240 ***
 
       // Wavefunction(s) for diagram number 381
-      helas_FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[101] );
+      helas_CD_FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[101] );
 
       // Amplitude(s) for diagram number 381
-      helas_FFV1_0( w_fp[101], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[101], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 381 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7596,7 +7596,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 382
-      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[77], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 382 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7612,7 +7612,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 383
-      helas_FFV1_0( w_fp[3], w_fp[95], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[95], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 383 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7628,7 +7628,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 384
-      helas_FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 384 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7641,10 +7641,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 385 OF 1240 ***
 
       // Wavefunction(s) for diagram number 385
-      helas_VVV1P0_1( w_fp[92], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[95] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[95] );
 
       // Amplitude(s) for diagram number 385
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 385 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7661,10 +7661,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 386 OF 1240 ***
 
       // Wavefunction(s) for diagram number 386
-      helas_FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
 
       // Amplitude(s) for diagram number 386
-      helas_FFV1_0( w_fp[22], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 386 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7678,7 +7678,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 387
-      helas_FFV1_0( w_fp[21], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 387 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7689,10 +7689,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 388 OF 1240 ***
 
       // Wavefunction(s) for diagram number 388
-      helas_FFV1P0_3( w_fp[52], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[103] );
+      helas_CD_FFV1P0_3( w_fp[52], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[103] );
 
       // Amplitude(s) for diagram number 388
-      helas_VVV1_0( w_fp[62], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 388 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7712,7 +7712,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 389
-      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 389 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7728,7 +7728,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 390
-      helas_VVV1_0( w_fp[86], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 390 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7748,7 +7748,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 391
-      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 391 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7764,7 +7764,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 392
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0];
@@ -7773,7 +7773,7 @@ namespace mg5amcCpu
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -7782,7 +7782,7 @@ namespace mg5amcCpu
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -7795,10 +7795,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 393 OF 1240 ***
 
       // Wavefunction(s) for diagram number 393
-      helas_FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
+      helas_CD_FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
 
       // Amplitude(s) for diagram number 393
-      helas_FFV1_0( w_fp[104], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 393 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7809,10 +7809,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 394 OF 1240 ***
 
       // Wavefunction(s) for diagram number 394
-      helas_FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[105] );
+      helas_CD_FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[105] );
 
       // Amplitude(s) for diagram number 394
-      helas_FFV1_0( w_fp[52], w_fp[105], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[105], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 394 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7826,7 +7826,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 395
-      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[39], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 395 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7842,7 +7842,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 396
-      helas_FFV1_0( w_fp[104], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 396 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7853,10 +7853,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 397 OF 1240 ***
 
       // Wavefunction(s) for diagram number 397
-      helas_FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
+      helas_CD_FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
 
       // Amplitude(s) for diagram number 397
-      helas_FFV1_0( w_fp[52], w_fp[106], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[106], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 397 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7870,7 +7870,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 398
-      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[47], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 398 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7886,7 +7886,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 399
-      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 399 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7902,7 +7902,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 400
-      helas_FFV1_0( w_fp[52], w_fp[102], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[102], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 400 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7918,7 +7918,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 401
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 401 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7938,7 +7938,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 402
-      helas_FFV1_0( w_fp[71], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 402 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7954,7 +7954,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 403
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 403 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7974,7 +7974,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 404
-      helas_FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 404 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -7990,7 +7990,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 405
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 405 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8010,7 +8010,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 406
-      helas_FFV1_0( w_fp[3], w_fp[94], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[94], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 406 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8030,7 +8030,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 407
-      helas_FFV1_0( w_fp[71], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 407 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8050,7 +8050,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 408
-      helas_VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
@@ -8067,7 +8067,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -8084,7 +8084,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
       jamp_sv[36] += amp_sv[0];
@@ -8105,10 +8105,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 409 OF 1240 ***
 
       // Wavefunction(s) for diagram number 409
-      helas_VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 409
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 409 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8133,10 +8133,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 410 OF 1240 ***
 
       // Wavefunction(s) for diagram number 410
-      helas_VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[107] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[107] );
 
       // Amplitude(s) for diagram number 410
-      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 410 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8164,7 +8164,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 411
-      helas_VVV1_0( w_fp[66], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 411 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8192,7 +8192,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 412
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 412 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8212,7 +8212,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 413
-      helas_FFV1_0( w_fp[3], w_fp[106], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[106], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 413 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8228,7 +8228,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 414
-      helas_FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 414 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8244,7 +8244,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 415
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 415 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8264,7 +8264,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 416
-      helas_FFV1_0( w_fp[41], w_fp[102], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[102], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 416 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8280,7 +8280,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 417
-      helas_FFV1_0( w_fp[101], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[101], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 417 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8296,7 +8296,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 418
-      helas_FFV1_0( w_fp[76], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 418 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8312,7 +8312,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 419
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 419 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8332,7 +8332,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 420
-      helas_FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 420 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8348,7 +8348,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 421
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 421 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8368,7 +8368,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 422
-      helas_FFV1_0( w_fp[3], w_fp[97], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[97], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 422 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8388,7 +8388,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 423
-      helas_FFV1_0( w_fp[76], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 423 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8408,7 +8408,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 424
-      helas_VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
@@ -8425,7 +8425,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[7] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[9] += amp_sv[0];
@@ -8442,7 +8442,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[9] += amp_sv[0];
       jamp_sv[42] += amp_sv[0];
@@ -8463,10 +8463,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 425 OF 1240 ***
 
       // Wavefunction(s) for diagram number 425
-      helas_VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 425
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 425 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8494,7 +8494,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 426
-      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 426 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8522,7 +8522,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 427
-      helas_VVV1_0( w_fp[72], w_fp[8], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[8], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 427 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8550,7 +8550,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 428
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 428 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8570,7 +8570,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 429
-      helas_FFV1_0( w_fp[3], w_fp[105], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[105], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 429 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8586,7 +8586,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 430
-      helas_FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 430 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8602,7 +8602,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 431
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 431 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8622,7 +8622,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 432
-      helas_FFV1_0( w_fp[38], w_fp[102], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[102], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 432 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8638,7 +8638,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 433
-      helas_FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 433 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8651,10 +8651,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 434 OF 1240 ***
 
       // Wavefunction(s) for diagram number 434
-      helas_VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 434
-      helas_VVV1_0( w_fp[104], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 434 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8682,7 +8682,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 435
-      helas_VVV1_0( w_fp[104], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 435 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8710,7 +8710,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 436
-      helas_VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -8727,7 +8727,7 @@ namespace mg5amcCpu
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -8744,7 +8744,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -8765,10 +8765,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 437 OF 1240 ***
 
       // Wavefunction(s) for diagram number 437
-      helas_VVV1P0_1( w_fp[1], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[108] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[108] );
 
       // Amplitude(s) for diagram number 437
-      helas_VVV1_0( w_fp[62], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 437 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8796,7 +8796,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 438
-      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 438 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8824,7 +8824,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 439
-      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -8841,7 +8841,7 @@ namespace mg5amcCpu
       jamp_sv[109] += amp_sv[0];
       jamp_sv[115] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[9] += amp_sv[0];
       jamp_sv[42] += amp_sv[0];
@@ -8858,7 +8858,7 @@ namespace mg5amcCpu
       jamp_sv[104] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -8882,7 +8882,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 440
-      helas_VVV1_0( w_fp[86], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 440 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8910,7 +8910,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 441
-      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 441 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -8938,7 +8938,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 442
-      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[11] += amp_sv[0];
       jamp_sv[25] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -8955,7 +8955,7 @@ namespace mg5amcCpu
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
       jamp_sv[36] += amp_sv[0];
@@ -8972,7 +8972,7 @@ namespace mg5amcCpu
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -8993,12 +8993,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 443 OF 1240 ***
 
       // Wavefunction(s) for diagram number 443
-      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
-      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
-      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_CD_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
+      helas_CD_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
+      helas_CD_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 443
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
@@ -9015,7 +9015,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -9032,7 +9032,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -9053,12 +9053,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 444 OF 1240 ***
 
       // Wavefunction(s) for diagram number 444
-      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] );
-      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[113] );
-      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[114] );
+      helas_CD_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] );
+      helas_CD_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[113] );
+      helas_CD_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[114] );
 
       // Amplitude(s) for diagram number 444
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
@@ -9075,7 +9075,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -9092,7 +9092,7 @@ namespace mg5amcCpu
       jamp_sv[94] += amp_sv[0];
       jamp_sv[98] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[114], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[114], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -9116,7 +9116,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 445
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -9133,7 +9133,7 @@ namespace mg5amcCpu
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[11] += amp_sv[0];
       jamp_sv[25] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -9150,7 +9150,7 @@ namespace mg5amcCpu
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -9174,7 +9174,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 446
-      helas_VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[6] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
@@ -9191,7 +9191,7 @@ namespace mg5amcCpu
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[6] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
@@ -9208,7 +9208,7 @@ namespace mg5amcCpu
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -9232,7 +9232,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 447
-      helas_VVV1_0( w_fp[8], w_fp[29], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[29], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 447 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9260,7 +9260,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 448
-      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 448 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9288,7 +9288,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 449
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 449 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9316,7 +9316,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 450
-      helas_VVV1_0( w_fp[104], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 450 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9336,7 +9336,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 451
-      helas_FFV1_0( w_fp[3], w_fp[44], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[44], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 451 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9352,7 +9352,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 452
-      helas_FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 452 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9366,7 +9366,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 453
-      helas_FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 453 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9380,7 +9380,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 454
-      helas_FFV1_0( w_fp[3], w_fp[89], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[89], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 454 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9396,7 +9396,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 455
-      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 455 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9416,7 +9416,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 456
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9425,7 +9425,7 @@ namespace mg5amcCpu
       jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[113], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[113], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9434,7 +9434,7 @@ namespace mg5amcCpu
       jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[114], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[114], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9450,7 +9450,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 457
-      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 457 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9466,7 +9466,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 458
-      helas_FFV1_0( w_fp[41], w_fp[105], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[105], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 458 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9480,7 +9480,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 459
-      helas_FFV1_0( w_fp[101], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[101], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 459 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9494,7 +9494,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 460
-      helas_VVV1_0( w_fp[104], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 460 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9514,7 +9514,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 461
-      helas_FFV1_0( w_fp[3], w_fp[50], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[50], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 461 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9530,7 +9530,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 462
-      helas_FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 462 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9544,7 +9544,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 463
-      helas_FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 463 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9558,7 +9558,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 464
-      helas_FFV1_0( w_fp[3], w_fp[91], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[91], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 464 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9574,7 +9574,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 465
-      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 465 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9594,7 +9594,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 466
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9603,7 +9603,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9612,7 +9612,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9628,7 +9628,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 467
-      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[47], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 467 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9644,7 +9644,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 468
-      helas_FFV1_0( w_fp[38], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 468 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9658,7 +9658,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 469
-      helas_FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 469 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9672,7 +9672,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 470
-      helas_VVV1_0( w_fp[104], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 470 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9692,7 +9692,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 471
-      helas_FFV1_0( w_fp[48], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 471 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9708,7 +9708,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 472
-      helas_FFV1_0( w_fp[58], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 472 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9722,7 +9722,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 473
-      helas_FFV1_0( w_fp[48], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 473 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9736,7 +9736,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 474
-      helas_FFV1_0( w_fp[58], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 474 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9752,7 +9752,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 475
-      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 475 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9772,7 +9772,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 476
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9781,7 +9781,7 @@ namespace mg5amcCpu
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[113], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[113], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9790,7 +9790,7 @@ namespace mg5amcCpu
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[114], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[114], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9806,7 +9806,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 477
-      helas_VVV1_0( w_fp[104], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 477 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9826,7 +9826,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 478
-      helas_FFV1_0( w_fp[40], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 478 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9842,7 +9842,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 479
-      helas_FFV1_0( w_fp[60], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 479 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9856,7 +9856,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 480
-      helas_FFV1_0( w_fp[40], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 480 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9870,7 +9870,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 481
-      helas_FFV1_0( w_fp[60], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 481 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9886,7 +9886,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 482
-      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 482 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9906,7 +9906,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 483
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9915,7 +9915,7 @@ namespace mg5amcCpu
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9924,7 +9924,7 @@ namespace mg5amcCpu
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0];
@@ -9940,7 +9940,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 484
-      helas_FFV1_0( w_fp[3], w_fp[18], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[18], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 484 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9960,7 +9960,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 485
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 485 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -9980,7 +9980,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 486
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 486 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10000,7 +10000,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 487
-      helas_FFV1_0( w_fp[12], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 487 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10016,7 +10016,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 488
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 488 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10036,7 +10036,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 489
-      helas_FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 489 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10052,7 +10052,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 490
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0];
@@ -10061,7 +10061,7 @@ namespace mg5amcCpu
       jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0];
@@ -10070,7 +10070,7 @@ namespace mg5amcCpu
       jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -10086,7 +10086,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 491
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[40] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0];
@@ -10095,7 +10095,7 @@ namespace mg5amcCpu
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -10104,7 +10104,7 @@ namespace mg5amcCpu
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -10120,7 +10120,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 492
-      helas_VVV1_0( w_fp[92], w_fp[55], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[55], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[9] += amp_sv[0];
@@ -10137,7 +10137,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      helas_VVV1_0( w_fp[92], w_fp[83], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[83], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[7] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[9] += amp_sv[0];
@@ -10154,7 +10154,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      helas_VVV1_0( w_fp[92], w_fp[84], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[84], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -10175,11 +10175,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 493 OF 1240 ***
 
       // Wavefunction(s) for diagram number 493
-      helas_VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[92] );
-      helas_FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[92] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 493
-      helas_FFV1_0( w_fp[99], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 493 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10193,7 +10193,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 494
-      helas_FFV1_0( w_fp[99], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 494 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10204,10 +10204,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 495 OF 1240 ***
 
       // Wavefunction(s) for diagram number 495
-      helas_VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[102] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[102] );
 
       // Amplitude(s) for diagram number 495
-      helas_VVV1_0( w_fp[102], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[102], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 495 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10227,7 +10227,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 496
-      helas_FFV1_0( w_fp[3], w_fp[85], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[85], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 496 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10240,10 +10240,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 497 OF 1240 ***
 
       // Wavefunction(s) for diagram number 497
-      helas_VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 497
-      helas_VVV1_0( w_fp[104], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 497 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10263,7 +10263,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 498
-      helas_FFV1_0( w_fp[3], w_fp[87], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[87], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 498 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10276,12 +10276,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 499 OF 1240 ***
 
       // Wavefunction(s) for diagram number 499
-      helas_VVVV1P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
-      helas_VVVV3P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[110] );
-      helas_VVVV4P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[109] );
+      helas_CD_VVVV1P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_CD_VVVV3P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[110] );
+      helas_CD_VVVV4P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[109] );
 
       // Amplitude(s) for diagram number 499
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0];
@@ -10290,7 +10290,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0];
@@ -10299,7 +10299,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0];
@@ -10312,10 +10312,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 500 OF 1240 ***
 
       // Wavefunction(s) for diagram number 500
-      helas_FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
+      helas_CD_FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
 
       // Amplitude(s) for diagram number 500
-      helas_FFV1_0( w_fp[46], w_fp[62], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[62], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 500 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10326,10 +10326,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 501 OF 1240 ***
 
       // Wavefunction(s) for diagram number 501
-      helas_FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
+      helas_CD_FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
 
       // Amplitude(s) for diagram number 501
-      helas_FFV1_0( w_fp[114], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[114], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 501 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10343,7 +10343,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 502
-      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[77], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 502 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10359,7 +10359,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 503
-      helas_FFV1_0( w_fp[41], w_fp[62], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[62], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 503 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10370,10 +10370,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 504 OF 1240 ***
 
       // Wavefunction(s) for diagram number 504
-      helas_FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] );
+      helas_CD_FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] );
 
       // Amplitude(s) for diagram number 504
-      helas_FFV1_0( w_fp[113], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[113], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 504 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10387,7 +10387,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 505
-      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[77], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 505 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10403,7 +10403,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 506
-      helas_FFV1_0( w_fp[3], w_fp[62], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[62], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 506 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10419,7 +10419,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 507
-      helas_FFV1_0( w_fp[99], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 507 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10432,10 +10432,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 508 OF 1240 ***
 
       // Wavefunction(s) for diagram number 508
-      helas_VVV1P0_1( w_fp[92], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[62] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[62] );
 
       // Amplitude(s) for diagram number 508
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 508 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10452,10 +10452,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 509 OF 1240 ***
 
       // Wavefunction(s) for diagram number 509
-      helas_FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[112] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[112] );
 
       // Amplitude(s) for diagram number 509
-      helas_FFV1_0( w_fp[56], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 509 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10469,7 +10469,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 510
-      helas_FFV1_0( w_fp[21], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 510 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10483,7 +10483,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 511
-      helas_VVV1_0( w_fp[102], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[102], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 511 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10503,7 +10503,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 512
-      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 512 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10519,7 +10519,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 513
-      helas_VVV1_0( w_fp[104], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 513 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10539,7 +10539,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 514
-      helas_FFV1_0( w_fp[56], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 514 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10555,7 +10555,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 515
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0];
@@ -10564,7 +10564,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
@@ -10573,7 +10573,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
@@ -10586,10 +10586,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 516 OF 1240 ***
 
       // Wavefunction(s) for diagram number 516
-      helas_FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] );
+      helas_CD_FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] );
 
       // Amplitude(s) for diagram number 516
-      helas_FFV1_0( w_fp[86], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 516 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10600,10 +10600,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 517 OF 1240 ***
 
       // Wavefunction(s) for diagram number 517
-      helas_FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
+      helas_CD_FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
 
       // Amplitude(s) for diagram number 517
-      helas_FFV1_0( w_fp[52], w_fp[98], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[98], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 517 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10617,7 +10617,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 518
-      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 518 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10633,7 +10633,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 519
-      helas_FFV1_0( w_fp[86], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 519 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10644,10 +10644,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 520 OF 1240 ***
 
       // Wavefunction(s) for diagram number 520
-      helas_FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
+      helas_CD_FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
 
       // Amplitude(s) for diagram number 520
-      helas_FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 520 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10661,7 +10661,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 521
-      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[47], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 521 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10677,7 +10677,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 522
-      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 522 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10693,7 +10693,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 523
-      helas_FFV1_0( w_fp[52], w_fp[112], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[112], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 523 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10709,7 +10709,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 524
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 524 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10729,7 +10729,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 525
-      helas_FFV1_0( w_fp[65], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 525 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10745,7 +10745,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 526
-      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[112], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 526 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10765,7 +10765,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 527
-      helas_FFV1_0( w_fp[99], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 527 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10781,7 +10781,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 528
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 528 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10801,7 +10801,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 529
-      helas_FFV1_0( w_fp[3], w_fp[93], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[93], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 529 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10821,7 +10821,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 530
-      helas_FFV1_0( w_fp[65], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 530 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10841,7 +10841,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 531
-      helas_VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[12] += amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -10858,7 +10858,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[114] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[12] += amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
@@ -10875,7 +10875,7 @@ namespace mg5amcCpu
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
-      helas_VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -10896,10 +10896,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 532 OF 1240 ***
 
       // Wavefunction(s) for diagram number 532
-      helas_VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[86] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 532
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 532 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10924,10 +10924,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 533 OF 1240 ***
 
       // Wavefunction(s) for diagram number 533
-      helas_VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[101] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[101] );
 
       // Amplitude(s) for diagram number 533
-      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[6], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 533 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10955,7 +10955,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 534
-      helas_VVV1_0( w_fp[61], w_fp[8], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[8], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 534 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -10983,7 +10983,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 535
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 535 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11003,7 +11003,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 536
-      helas_FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 536 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11019,7 +11019,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 537
-      helas_FFV1_0( w_fp[99], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 537 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11035,7 +11035,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 538
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 538 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11055,7 +11055,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 539
-      helas_FFV1_0( w_fp[41], w_fp[112], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[112], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 539 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11071,7 +11071,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 540
-      helas_FFV1_0( w_fp[113], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[113], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 540 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11087,7 +11087,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 541
-      helas_FFV1_0( w_fp[76], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 541 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11103,7 +11103,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 542
-      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[112], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 542 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11123,7 +11123,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 543
-      helas_FFV1_0( w_fp[99], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 543 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11139,7 +11139,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 544
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 544 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11159,7 +11159,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 545
-      helas_FFV1_0( w_fp[3], w_fp[97], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[97], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 545 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11179,7 +11179,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 546
-      helas_FFV1_0( w_fp[76], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 546 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11199,7 +11199,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 547
-      helas_VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[13] += amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
@@ -11216,7 +11216,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[103] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[13] += amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
@@ -11233,7 +11233,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[43] += amp_sv[0];
@@ -11254,10 +11254,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 548 OF 1240 ***
 
       // Wavefunction(s) for diagram number 548
-      helas_VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[86] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 548
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 548 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11285,7 +11285,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 549
-      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[4], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 549 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11313,7 +11313,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 550
-      helas_VVV1_0( w_fp[72], w_fp[8], w_fp[102], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[8], w_fp[102], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 550 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11341,7 +11341,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 551
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 551 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11361,7 +11361,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 552
-      helas_FFV1_0( w_fp[3], w_fp[98], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[98], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 552 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11377,7 +11377,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 553
-      helas_FFV1_0( w_fp[99], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 553 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11393,7 +11393,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 554
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 554 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11413,7 +11413,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 555
-      helas_FFV1_0( w_fp[46], w_fp[112], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[112], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 555 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11429,7 +11429,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 556
-      helas_FFV1_0( w_fp[114], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[114], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 556 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11442,10 +11442,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 557 OF 1240 ***
 
       // Wavefunction(s) for diagram number 557
-      helas_VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[86] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 557
-      helas_VVV1_0( w_fp[86], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 557 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11473,7 +11473,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 558
-      helas_VVV1_0( w_fp[86], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 558 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11501,7 +11501,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 559
-      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -11518,7 +11518,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
@@ -11535,7 +11535,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[26] += amp_sv[0];
@@ -11559,7 +11559,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 560
-      helas_VVV1_0( w_fp[102], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[102], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 560 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11587,7 +11587,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 561
-      helas_VVV1_0( w_fp[102], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[102], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 561 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11615,7 +11615,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 562
-      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[15] += amp_sv[0];
       jamp_sv[26] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -11632,7 +11632,7 @@ namespace mg5amcCpu
       jamp_sv[109] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[43] += amp_sv[0];
@@ -11649,7 +11649,7 @@ namespace mg5amcCpu
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -11673,7 +11673,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 563
-      helas_VVV1_0( w_fp[104], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 563 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11701,7 +11701,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 564
-      helas_VVV1_0( w_fp[104], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 564 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -11729,7 +11729,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 565
-      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[17] += amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -11746,7 +11746,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -11763,7 +11763,7 @@ namespace mg5amcCpu
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -11784,12 +11784,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 566 OF 1240 ***
 
       // Wavefunction(s) for diagram number 566
-      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] );
-      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] );
-      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] );
+      helas_CD_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] );
+      helas_CD_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] );
+      helas_CD_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] );
 
       // Amplitude(s) for diagram number 566
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[12] += amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -11806,7 +11806,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[114] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -11823,7 +11823,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
@@ -11844,12 +11844,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 567 OF 1240 ***
 
       // Wavefunction(s) for diagram number 567
-      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] );
-      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] );
-      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] );
+      helas_CD_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] );
+      helas_CD_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] );
+      helas_CD_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] );
 
       // Amplitude(s) for diagram number 567
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[13] += amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
@@ -11866,7 +11866,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[103] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -11883,7 +11883,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -11907,7 +11907,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 568
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
       jamp_sv[26] += amp_sv[0];
@@ -11924,7 +11924,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[17] += amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -11941,7 +11941,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[15] += amp_sv[0];
       jamp_sv[26] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -11965,7 +11965,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 569
-      helas_VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[12] += amp_sv[0];
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[26] -= amp_sv[0];
@@ -11982,7 +11982,7 @@ namespace mg5amcCpu
       jamp_sv[110] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[12] += amp_sv[0];
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
@@ -11999,7 +11999,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
-      helas_VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
       jamp_sv[26] += amp_sv[0];
@@ -12023,7 +12023,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 570
-      helas_VVV1_0( w_fp[8], w_fp[27], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[27], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 570 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12051,7 +12051,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 571
-      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[27], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 571 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12079,7 +12079,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 572
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 572 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12107,7 +12107,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 573
-      helas_VVV1_0( w_fp[86], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 573 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12127,7 +12127,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 574
-      helas_FFV1_0( w_fp[3], w_fp[36], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[36], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 574 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12143,7 +12143,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 575
-      helas_FFV1_0( w_fp[99], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 575 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12157,7 +12157,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 576
-      helas_FFV1_0( w_fp[99], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 576 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12171,7 +12171,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 577
-      helas_FFV1_0( w_fp[3], w_fp[100], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[100], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 577 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12187,7 +12187,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 578
-      helas_VVV1_0( w_fp[104], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 578 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12207,7 +12207,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 579
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12216,7 +12216,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12225,7 +12225,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12241,7 +12241,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 580
-      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 580 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12257,7 +12257,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 581
-      helas_FFV1_0( w_fp[41], w_fp[98], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[98], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 581 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12271,7 +12271,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 582
-      helas_FFV1_0( w_fp[113], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[113], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 582 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12285,7 +12285,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 583
-      helas_VVV1_0( w_fp[86], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 583 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12305,7 +12305,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 584
-      helas_FFV1_0( w_fp[3], w_fp[49], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[49], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 584 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12321,7 +12321,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 585
-      helas_FFV1_0( w_fp[99], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 585 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12335,7 +12335,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 586
-      helas_FFV1_0( w_fp[99], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 586 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12349,7 +12349,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 587
-      helas_FFV1_0( w_fp[3], w_fp[91], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[91], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 587 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12365,7 +12365,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 588
-      helas_VVV1_0( w_fp[102], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[102], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 588 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12385,7 +12385,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 589
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12394,7 +12394,7 @@ namespace mg5amcCpu
       jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12403,7 +12403,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12419,7 +12419,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 590
-      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[47], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 590 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12435,7 +12435,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 591
-      helas_FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 591 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12449,7 +12449,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 592
-      helas_FFV1_0( w_fp[114], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[114], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 592 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12463,7 +12463,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 593
-      helas_VVV1_0( w_fp[86], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 593 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12483,7 +12483,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 594
-      helas_FFV1_0( w_fp[53], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 594 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12499,7 +12499,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 595
-      helas_FFV1_0( w_fp[78], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 595 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12513,7 +12513,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 596
-      helas_FFV1_0( w_fp[53], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 596 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12527,7 +12527,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 597
-      helas_FFV1_0( w_fp[78], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 597 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12543,7 +12543,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 598
-      helas_VVV1_0( w_fp[104], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 598 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12563,7 +12563,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 599
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12572,7 +12572,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12581,7 +12581,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12597,7 +12597,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 600
-      helas_VVV1_0( w_fp[86], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 600 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12617,7 +12617,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 601
-      helas_FFV1_0( w_fp[28], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 601 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12633,7 +12633,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 602
-      helas_FFV1_0( w_fp[60], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 602 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12647,7 +12647,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 603
-      helas_FFV1_0( w_fp[28], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 603 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12661,7 +12661,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 604
-      helas_FFV1_0( w_fp[60], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 604 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12677,7 +12677,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 605
-      helas_VVV1_0( w_fp[102], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[102], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 605 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12697,7 +12697,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 606
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12706,7 +12706,7 @@ namespace mg5amcCpu
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12715,7 +12715,7 @@ namespace mg5amcCpu
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12731,7 +12731,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 607
-      helas_FFV1_0( w_fp[3], w_fp[15], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[15], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 607 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12751,7 +12751,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 608
-      helas_FFV1_0( w_fp[14], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 608 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12771,7 +12771,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 609
-      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[112], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 609 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12791,7 +12791,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 610
-      helas_FFV1_0( w_fp[14], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 610 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12807,7 +12807,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 611
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 611 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12827,7 +12827,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 612
-      helas_FFV1_0( w_fp[99], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 612 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12843,7 +12843,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 613
-      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[112], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12852,7 +12852,7 @@ namespace mg5amcCpu
       jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[112], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[14] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12861,7 +12861,7 @@ namespace mg5amcCpu
       jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[112], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[14] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -12877,7 +12877,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 614
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[34] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0];
@@ -12886,7 +12886,7 @@ namespace mg5amcCpu
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -12895,7 +12895,7 @@ namespace mg5amcCpu
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -12911,7 +12911,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 615
-      helas_VVV1_0( w_fp[92], w_fp[57], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[57], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[15] += amp_sv[0];
@@ -12928,7 +12928,7 @@ namespace mg5amcCpu
       jamp_sv[77] += amp_sv[0];
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      helas_VVV1_0( w_fp[92], w_fp[81], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[81], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[13] += amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
@@ -12945,7 +12945,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
-      helas_VVV1_0( w_fp[92], w_fp[82], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[82], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[12] += amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
@@ -12966,11 +12966,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 616 OF 1240 ***
 
       // Wavefunction(s) for diagram number 616
-      helas_VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[92] );
-      helas_FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[92] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 616
-      helas_FFV1_0( w_fp[99], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 616 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12984,7 +12984,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 617
-      helas_FFV1_0( w_fp[99], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 617 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -12995,10 +12995,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 618 OF 1240 ***
 
       // Wavefunction(s) for diagram number 618
-      helas_VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[112] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[112] );
 
       // Amplitude(s) for diagram number 618
-      helas_VVV1_0( w_fp[112], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[112], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 618 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13018,7 +13018,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 619
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 619 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13031,10 +13031,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 620 OF 1240 ***
 
       // Wavefunction(s) for diagram number 620
-      helas_VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[86] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 620
-      helas_VVV1_0( w_fp[86], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 620 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13054,7 +13054,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 621
-      helas_FFV1_0( w_fp[3], w_fp[87], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[87], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 621 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13067,12 +13067,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 622 OF 1240 ***
 
       // Wavefunction(s) for diagram number 622
-      helas_VVVV1P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[107] );
-      helas_VVVV3P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[95] );
-      helas_VVVV4P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[105] );
+      helas_CD_VVVV1P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[107] );
+      helas_CD_VVVV3P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[95] );
+      helas_CD_VVVV4P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[105] );
 
       // Amplitude(s) for diagram number 622
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0];
@@ -13081,7 +13081,7 @@ namespace mg5amcCpu
       jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0];
@@ -13090,7 +13090,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0];
@@ -13103,10 +13103,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 623 OF 1240 ***
 
       // Wavefunction(s) for diagram number 623
-      helas_FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
+      helas_CD_FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
 
       // Amplitude(s) for diagram number 623
-      helas_FFV1_0( w_fp[46], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 623 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13117,10 +13117,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 624 OF 1240 ***
 
       // Wavefunction(s) for diagram number 624
-      helas_FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] );
+      helas_CD_FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] );
 
       // Amplitude(s) for diagram number 624
-      helas_FFV1_0( w_fp[88], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 624 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13134,7 +13134,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 625
-      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 625 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13150,7 +13150,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 626
-      helas_FFV1_0( w_fp[38], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 626 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13161,10 +13161,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 627 OF 1240 ***
 
       // Wavefunction(s) for diagram number 627
-      helas_FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
+      helas_CD_FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
 
       // Amplitude(s) for diagram number 627
-      helas_FFV1_0( w_fp[90], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 627 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13178,7 +13178,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 628
-      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[77], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 628 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13194,7 +13194,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 629
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 629 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13210,7 +13210,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 630
-      helas_FFV1_0( w_fp[99], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 630 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13223,10 +13223,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 631 OF 1240 ***
 
       // Wavefunction(s) for diagram number 631
-      helas_VVV1P0_1( w_fp[92], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[102] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[102] );
 
       // Amplitude(s) for diagram number 631
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 631 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13243,10 +13243,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 632 OF 1240 ***
 
       // Wavefunction(s) for diagram number 632
-      helas_FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[96] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[96] );
 
       // Amplitude(s) for diagram number 632
-      helas_FFV1_0( w_fp[56], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 632 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13260,7 +13260,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 633
-      helas_FFV1_0( w_fp[22], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 633 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13274,7 +13274,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 634
-      helas_VVV1_0( w_fp[112], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[112], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 634 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13294,7 +13294,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 635
-      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 635 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13310,7 +13310,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 636
-      helas_VVV1_0( w_fp[86], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 636 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13330,7 +13330,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 637
-      helas_FFV1_0( w_fp[56], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 637 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13346,7 +13346,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 638
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
@@ -13355,7 +13355,7 @@ namespace mg5amcCpu
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
@@ -13364,7 +13364,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0];
@@ -13377,10 +13377,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 639 OF 1240 ***
 
       // Wavefunction(s) for diagram number 639
-      helas_FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
+      helas_CD_FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
 
       // Amplitude(s) for diagram number 639
-      helas_FFV1_0( w_fp[104], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 639 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13391,10 +13391,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 640 OF 1240 ***
 
       // Wavefunction(s) for diagram number 640
-      helas_FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
+      helas_CD_FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
 
       // Amplitude(s) for diagram number 640
-      helas_FFV1_0( w_fp[52], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 640 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13408,7 +13408,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 641
-      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 641 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13424,7 +13424,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 642
-      helas_FFV1_0( w_fp[104], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 642 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13435,10 +13435,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 643 OF 1240 ***
 
       // Wavefunction(s) for diagram number 643
-      helas_FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
+      helas_CD_FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
 
       // Amplitude(s) for diagram number 643
-      helas_FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 643 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13452,7 +13452,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 644
-      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[39], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 644 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13468,7 +13468,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 645
-      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 645 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13484,7 +13484,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 646
-      helas_FFV1_0( w_fp[52], w_fp[96], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[96], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 646 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13500,7 +13500,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 647
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 647 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13520,7 +13520,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 648
-      helas_FFV1_0( w_fp[65], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 648 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13536,7 +13536,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 649
-      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[96], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 649 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13556,7 +13556,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 650
-      helas_FFV1_0( w_fp[99], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 650 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13572,7 +13572,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 651
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 651 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13592,7 +13592,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 652
-      helas_FFV1_0( w_fp[3], w_fp[93], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[93], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 652 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13612,7 +13612,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 653
-      helas_FFV1_0( w_fp[65], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 653 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13632,7 +13632,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 654
-      helas_VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[18] += amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -13649,7 +13649,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[96] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      helas_VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[18] += amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[22] -= amp_sv[0];
@@ -13666,7 +13666,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -13687,10 +13687,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 655 OF 1240 ***
 
       // Wavefunction(s) for diagram number 655
-      helas_VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 655
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 655 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13715,10 +13715,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 656 OF 1240 ***
 
       // Wavefunction(s) for diagram number 656
-      helas_VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[113] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[113] );
 
       // Amplitude(s) for diagram number 656
-      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[5], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 656 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13746,7 +13746,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 657
-      helas_VVV1_0( w_fp[61], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 657 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13774,7 +13774,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 658
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 658 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13794,7 +13794,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 659
-      helas_FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 659 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13810,7 +13810,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 660
-      helas_FFV1_0( w_fp[99], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 660 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13826,7 +13826,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 661
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 661 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13846,7 +13846,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 662
-      helas_FFV1_0( w_fp[38], w_fp[96], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[96], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 662 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13862,7 +13862,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 663
-      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 663 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13878,7 +13878,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 664
-      helas_FFV1_0( w_fp[71], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 664 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13894,7 +13894,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 665
-      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[96], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 665 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13914,7 +13914,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 666
-      helas_FFV1_0( w_fp[99], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 666 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13930,7 +13930,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 667
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 667 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13950,7 +13950,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 668
-      helas_FFV1_0( w_fp[3], w_fp[94], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[94], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 668 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13970,7 +13970,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 669
-      helas_FFV1_0( w_fp[71], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 669 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -13990,7 +13990,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 670
-      helas_VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[19] += amp_sv[0];
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -14007,7 +14007,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      helas_VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[19] += amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
@@ -14024,7 +14024,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      helas_VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
       jamp_sv[37] += amp_sv[0];
@@ -14045,10 +14045,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 671 OF 1240 ***
 
       // Wavefunction(s) for diagram number 671
-      helas_VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 671
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 671 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14076,7 +14076,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 672
-      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[4], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 672 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14104,7 +14104,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 673
-      helas_VVV1_0( w_fp[66], w_fp[8], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[8], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 673 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14132,7 +14132,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 674
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 674 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14152,7 +14152,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 675
-      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[114], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 675 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14168,7 +14168,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 676
-      helas_FFV1_0( w_fp[99], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 676 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14184,7 +14184,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 677
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 677 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14204,7 +14204,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 678
-      helas_FFV1_0( w_fp[46], w_fp[96], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[96], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 678 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14220,7 +14220,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 679
-      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 679 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14233,10 +14233,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 680 OF 1240 ***
 
       // Wavefunction(s) for diagram number 680
-      helas_VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 680
-      helas_VVV1_0( w_fp[104], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 680 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14264,7 +14264,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 681
-      helas_VVV1_0( w_fp[104], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 681 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14292,7 +14292,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 682
-      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
@@ -14309,7 +14309,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[97] += amp_sv[0];
-      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
@@ -14326,7 +14326,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[96] += amp_sv[0];
-      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
       jamp_sv[28] += amp_sv[0];
@@ -14350,7 +14350,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 683
-      helas_VVV1_0( w_fp[112], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[112], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 683 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14378,7 +14378,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 684
-      helas_VVV1_0( w_fp[112], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[112], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 684 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14406,7 +14406,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 685
-      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[21] += amp_sv[0];
       jamp_sv[28] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -14423,7 +14423,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
       jamp_sv[37] += amp_sv[0];
@@ -14440,7 +14440,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -14464,7 +14464,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 686
-      helas_VVV1_0( w_fp[86], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 686 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14492,7 +14492,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 687
-      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 687 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14520,7 +14520,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 688
-      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[23] += amp_sv[0];
       jamp_sv[29] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -14537,7 +14537,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -14554,7 +14554,7 @@ namespace mg5amcCpu
       jamp_sv[91] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -14575,12 +14575,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 689 OF 1240 ***
 
       // Wavefunction(s) for diagram number 689
-      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[98] );
-      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[62] );
-      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[101] );
+      helas_CD_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[98] );
+      helas_CD_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[62] );
+      helas_CD_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[101] );
 
       // Amplitude(s) for diagram number 689
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[18] += amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -14597,7 +14597,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[96] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -14614,7 +14614,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
@@ -14635,12 +14635,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 690 OF 1240 ***
 
       // Wavefunction(s) for diagram number 690
-      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
-      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
-      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_CD_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
+      helas_CD_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
+      helas_CD_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 690
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[19] += amp_sv[0];
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -14657,7 +14657,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -14674,7 +14674,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
@@ -14698,7 +14698,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 691
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
       jamp_sv[28] += amp_sv[0];
@@ -14715,7 +14715,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[99] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[23] += amp_sv[0];
       jamp_sv[29] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -14732,7 +14732,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[21] += amp_sv[0];
       jamp_sv[28] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -14756,7 +14756,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 692
-      helas_VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[18] += amp_sv[0];
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[28] -= amp_sv[0];
@@ -14773,7 +14773,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[96] -= amp_sv[0];
       jamp_sv[97] += amp_sv[0];
-      helas_VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[18] += amp_sv[0];
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
@@ -14790,7 +14790,7 @@ namespace mg5amcCpu
       jamp_sv[97] += amp_sv[0];
       jamp_sv[99] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
       jamp_sv[28] += amp_sv[0];
@@ -14814,7 +14814,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 693
-      helas_VVV1_0( w_fp[8], w_fp[24], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[24], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 693 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14842,7 +14842,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 694
-      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[24], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 694 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14870,7 +14870,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 695
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[102], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[102], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 695 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14898,7 +14898,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 696
-      helas_VVV1_0( w_fp[104], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 696 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14918,7 +14918,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 697
-      helas_FFV1_0( w_fp[3], w_fp[35], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[35], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 697 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14934,7 +14934,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 698
-      helas_FFV1_0( w_fp[99], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 698 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14948,7 +14948,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 699
-      helas_FFV1_0( w_fp[99], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 699 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14962,7 +14962,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 700
-      helas_FFV1_0( w_fp[3], w_fp[100], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[100], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 700 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14978,7 +14978,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 701
-      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 701 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -14998,7 +14998,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 702
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15007,7 +15007,7 @@ namespace mg5amcCpu
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15016,7 +15016,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15032,7 +15032,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 703
-      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 703 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15048,7 +15048,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 704
-      helas_FFV1_0( w_fp[38], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 704 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15062,7 +15062,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 705
-      helas_FFV1_0( w_fp[90], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 705 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15076,7 +15076,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 706
-      helas_VVV1_0( w_fp[104], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 706 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15096,7 +15096,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 707
-      helas_FFV1_0( w_fp[3], w_fp[43], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[43], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 707 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15112,7 +15112,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 708
-      helas_FFV1_0( w_fp[99], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 708 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15126,7 +15126,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 709
-      helas_FFV1_0( w_fp[99], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 709 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15140,7 +15140,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 710
-      helas_FFV1_0( w_fp[3], w_fp[89], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[89], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 710 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15156,7 +15156,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 711
-      helas_VVV1_0( w_fp[112], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[112], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 711 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15176,7 +15176,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 712
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15185,7 +15185,7 @@ namespace mg5amcCpu
       jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15194,7 +15194,7 @@ namespace mg5amcCpu
       jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15210,7 +15210,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 713
-      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 713 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15226,7 +15226,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 714
-      helas_FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 714 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15240,7 +15240,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 715
-      helas_FFV1_0( w_fp[88], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 715 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15254,7 +15254,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 716
-      helas_VVV1_0( w_fp[104], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 716 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15274,7 +15274,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 717
-      helas_FFV1_0( w_fp[7], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 717 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15290,7 +15290,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 718
-      helas_FFV1_0( w_fp[78], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 718 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15304,7 +15304,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 719
-      helas_FFV1_0( w_fp[7], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 719 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15318,7 +15318,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 720
-      helas_FFV1_0( w_fp[78], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 720 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15334,7 +15334,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 721
-      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 721 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15354,7 +15354,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 722
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15363,7 +15363,7 @@ namespace mg5amcCpu
       jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15372,7 +15372,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15388,7 +15388,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 723
-      helas_VVV1_0( w_fp[104], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 723 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15408,7 +15408,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 724
-      helas_FFV1_0( w_fp[25], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 724 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15424,7 +15424,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 725
-      helas_FFV1_0( w_fp[58], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 725 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15438,7 +15438,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 726
-      helas_FFV1_0( w_fp[25], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 726 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15452,7 +15452,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 727
-      helas_FFV1_0( w_fp[58], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 727 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15468,7 +15468,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 728
-      helas_VVV1_0( w_fp[112], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[112], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 728 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15488,7 +15488,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 729
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15497,7 +15497,7 @@ namespace mg5amcCpu
       jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15506,7 +15506,7 @@ namespace mg5amcCpu
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15522,7 +15522,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 730
-      helas_FFV1_0( w_fp[3], w_fp[17], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[17], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 730 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15542,7 +15542,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 731
-      helas_FFV1_0( w_fp[26], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 731 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15562,7 +15562,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 732
-      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[96], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 732 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15582,7 +15582,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 733
-      helas_FFV1_0( w_fp[26], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 733 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15598,7 +15598,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 734
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 734 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15618,7 +15618,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 735
-      helas_FFV1_0( w_fp[99], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 735 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15634,7 +15634,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 736
-      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[96], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15643,7 +15643,7 @@ namespace mg5amcCpu
       jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[96], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15652,7 +15652,7 @@ namespace mg5amcCpu
       jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[96], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -15668,7 +15668,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 737
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[38] += cxtype( 0, 1 ) * amp_sv[0];
@@ -15677,7 +15677,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[38] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -15686,7 +15686,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[81] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -15702,7 +15702,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 738
-      helas_VVV1_0( w_fp[92], w_fp[73], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[73], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
       jamp_sv[21] += amp_sv[0];
@@ -15719,7 +15719,7 @@ namespace mg5amcCpu
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      helas_VVV1_0( w_fp[92], w_fp[79], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[79], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[19] += amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
@@ -15736,7 +15736,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      helas_VVV1_0( w_fp[92], w_fp[80], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[80], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[18] += amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[22] -= amp_sv[0];
@@ -15757,10 +15757,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 739 OF 1240 ***
 
       // Wavefunction(s) for diagram number 739
-      helas_FFV1_1( w_fp[77], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[92] );
+      helas_CD_FFV1_1( w_fp[77], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[92] );
 
       // Amplitude(s) for diagram number 739
-      helas_FFV1_0( w_fp[7], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 739 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15773,7 +15773,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 740
-      helas_FFV1_0( w_fp[53], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 740 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15783,10 +15783,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 741 OF 1240 ***
 
       // Wavefunction(s) for diagram number 741
-      helas_FFV1_2( w_fp[46], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_CD_FFV1_2( w_fp[46], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 741
-      helas_FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 741 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15799,7 +15799,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 742
-      helas_FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 742 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15812,7 +15812,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 743
-      helas_FFV1_0( w_fp[53], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 743 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15825,7 +15825,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 744
-      helas_FFV1_0( w_fp[7], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 744 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15838,7 +15838,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 745
-      helas_FFV1_0( w_fp[46], w_fp[92], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[92], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 745 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15852,7 +15852,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 746
-      helas_FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 746 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15863,10 +15863,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 747 OF 1240 ***
 
       // Wavefunction(s) for diagram number 747
-      helas_VVV1P0_1( w_fp[0], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[96] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[96] );
 
       // Amplitude(s) for diagram number 747
-      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[77], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 747 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15882,7 +15882,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 748
-      helas_FFV1_0( w_fp[25], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 748 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15895,7 +15895,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 749
-      helas_FFV1_0( w_fp[48], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 749 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15905,10 +15905,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 750 OF 1240 ***
 
       // Wavefunction(s) for diagram number 750
-      helas_FFV1_2( w_fp[38], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
+      helas_CD_FFV1_2( w_fp[38], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
 
       // Amplitude(s) for diagram number 750
-      helas_FFV1_0( w_fp[104], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 750 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15921,7 +15921,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 751
-      helas_FFV1_0( w_fp[104], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 751 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15934,7 +15934,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 752
-      helas_FFV1_0( w_fp[48], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 752 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15947,7 +15947,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 753
-      helas_FFV1_0( w_fp[25], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 753 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15960,7 +15960,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 754
-      helas_FFV1_0( w_fp[38], w_fp[92], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[92], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 754 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15974,7 +15974,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 755
-      helas_FFV1_0( w_fp[104], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 755 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -15985,10 +15985,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 756 OF 1240 ***
 
       // Wavefunction(s) for diagram number 756
-      helas_VVV1P0_1( w_fp[0], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[101] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[101] );
 
       // Amplitude(s) for diagram number 756
-      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[77], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 756 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16004,7 +16004,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 757
-      helas_FFV1_0( w_fp[28], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 757 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16017,7 +16017,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 758
-      helas_FFV1_0( w_fp[40], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 758 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16027,10 +16027,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 759 OF 1240 ***
 
       // Wavefunction(s) for diagram number 759
-      helas_FFV1_2( w_fp[41], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
+      helas_CD_FFV1_2( w_fp[41], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
 
       // Amplitude(s) for diagram number 759
-      helas_FFV1_0( w_fp[62], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 759 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16043,7 +16043,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 760
-      helas_FFV1_0( w_fp[62], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 760 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16056,7 +16056,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 761
-      helas_FFV1_0( w_fp[40], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 761 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16069,7 +16069,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 762
-      helas_FFV1_0( w_fp[28], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 762 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16082,7 +16082,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 763
-      helas_FFV1_0( w_fp[41], w_fp[92], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[92], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 763 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16096,7 +16096,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 764
-      helas_FFV1_0( w_fp[62], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 764 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16107,10 +16107,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 765 OF 1240 ***
 
       // Wavefunction(s) for diagram number 765
-      helas_VVV1P0_1( w_fp[0], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[98] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[98] );
 
       // Amplitude(s) for diagram number 765
-      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[77], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 765 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16126,7 +16126,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 766
-      helas_FFV1_0( w_fp[26], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 766 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16140,7 +16140,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 767
-      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[92], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 767 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16156,7 +16156,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 768
-      helas_VVV1_0( w_fp[98], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[98], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 768 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16176,7 +16176,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 769
-      helas_FFV1_0( w_fp[3], w_fp[85], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[85], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 769 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16192,7 +16192,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 770
-      helas_VVV1_0( w_fp[0], w_fp[34], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[34], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 770 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16212,7 +16212,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 771
-      helas_FFV1_0( w_fp[26], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 771 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16223,12 +16223,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 772 OF 1240 ***
 
       // Wavefunction(s) for diagram number 772
-      helas_VVVV1P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[85] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[85] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 772
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[85], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[85], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0];
@@ -16237,7 +16237,7 @@ namespace mg5amcCpu
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -16246,7 +16246,7 @@ namespace mg5amcCpu
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -16262,7 +16262,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 773
-      helas_FFV1_0( w_fp[14], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 773 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16276,7 +16276,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 774
-      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[92], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 774 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16292,7 +16292,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 775
-      helas_VVV1_0( w_fp[101], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[101], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 775 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16312,7 +16312,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 776
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 776 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16328,7 +16328,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 777
-      helas_VVV1_0( w_fp[0], w_fp[34], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[34], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 777 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16348,7 +16348,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 778
-      helas_FFV1_0( w_fp[14], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 778 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16359,12 +16359,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 779 OF 1240 ***
 
       // Wavefunction(s) for diagram number 779
-      helas_VVVV1P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[9] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[9] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
 
       // Amplitude(s) for diagram number 779
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -16373,7 +16373,7 @@ namespace mg5amcCpu
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[34] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -16382,7 +16382,7 @@ namespace mg5amcCpu
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[34] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -16398,7 +16398,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 780
-      helas_FFV1_0( w_fp[12], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 780 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16412,7 +16412,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 781
-      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[92], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 781 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16428,7 +16428,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 782
-      helas_VVV1_0( w_fp[96], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[96], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 782 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16448,7 +16448,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 783
-      helas_FFV1_0( w_fp[3], w_fp[87], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[87], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 783 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16464,7 +16464,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 784
-      helas_VVV1_0( w_fp[0], w_fp[34], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[34], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 784 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16484,7 +16484,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 785
-      helas_FFV1_0( w_fp[12], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 785 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16495,12 +16495,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 786 OF 1240 ***
 
       // Wavefunction(s) for diagram number 786
-      helas_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[87] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[34] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[86] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[87] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[34] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 786
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[87], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[87], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0];
@@ -16509,7 +16509,7 @@ namespace mg5amcCpu
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[34], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[34], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -16518,7 +16518,7 @@ namespace mg5amcCpu
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[40] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -16534,17 +16534,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 787
-      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[92], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] += amp_sv[0];
       jamp_sv[25] -= amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[92], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[25] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[92], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[28] += amp_sv[0];
@@ -16553,12 +16553,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 788 OF 1240 ***
 
       // Wavefunction(s) for diagram number 788
-      helas_VVV1P0_1( w_fp[0], w_fp[30], COUPs[0], 1.0, 0., 0., w_fp[92] );
-      helas_VVV1P0_1( w_fp[0], w_fp[31], COUPs[0], 1.0, 0., 0., w_fp[88] );
-      helas_VVV1P0_1( w_fp[0], w_fp[32], COUPs[0], 1.0, 0., 0., w_fp[106] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[30], COUPs[0], 1.0, 0., 0., w_fp[92] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[31], COUPs[0], 1.0, 0., 0., w_fp[88] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[32], COUPs[0], 1.0, 0., 0., w_fp[106] );
 
       // Amplitude(s) for diagram number 788
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[92], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[92], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0];
@@ -16567,7 +16567,7 @@ namespace mg5amcCpu
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0];
@@ -16576,7 +16576,7 @@ namespace mg5amcCpu
       jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[106], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[106], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -16589,10 +16589,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 789 OF 1240 ***
 
       // Wavefunction(s) for diagram number 789
-      helas_FFV1_2( w_fp[52], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
+      helas_CD_FFV1_2( w_fp[52], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
 
       // Amplitude(s) for diagram number 789
-      helas_FFV1_0( w_fp[90], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 789 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16605,7 +16605,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 790
-      helas_FFV1_0( w_fp[90], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 790 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16615,10 +16615,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 791 OF 1240 ***
 
       // Wavefunction(s) for diagram number 791
-      helas_FFV1_1( w_fp[33], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
+      helas_CD_FFV1_1( w_fp[33], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
 
       // Amplitude(s) for diagram number 791
-      helas_FFV1_0( w_fp[22], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 791 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16631,7 +16631,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 792
-      helas_FFV1_0( w_fp[21], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 792 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16644,7 +16644,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 793
-      helas_FFV1_0( w_fp[22], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 793 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16657,7 +16657,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 794
-      helas_FFV1_0( w_fp[21], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 794 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16670,7 +16670,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 795
-      helas_FFV1_0( w_fp[90], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 795 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16684,7 +16684,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 796
-      helas_FFV1_0( w_fp[52], w_fp[114], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[114], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 796 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16698,7 +16698,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 797
-      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[33], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 797 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16714,7 +16714,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 798
-      helas_FFV1_0( w_fp[90], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 798 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16727,7 +16727,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 799
-      helas_FFV1_0( w_fp[90], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 799 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16737,10 +16737,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 800 OF 1240 ***
 
       // Wavefunction(s) for diagram number 800
-      helas_FFV1_1( w_fp[39], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
+      helas_CD_FFV1_1( w_fp[39], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
 
       // Amplitude(s) for diagram number 800
-      helas_FFV1_0( w_fp[56], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 800 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16753,7 +16753,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 801
-      helas_FFV1_0( w_fp[21], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 801 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16766,7 +16766,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 802
-      helas_FFV1_0( w_fp[56], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 802 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16779,7 +16779,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 803
-      helas_FFV1_0( w_fp[21], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 803 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16792,7 +16792,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 804
-      helas_FFV1_0( w_fp[90], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 804 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16806,7 +16806,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 805
-      helas_FFV1_0( w_fp[52], w_fp[102], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[102], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 805 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16820,7 +16820,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 806
-      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[39], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 806 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16836,7 +16836,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 807
-      helas_FFV1_0( w_fp[90], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 807 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16849,7 +16849,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 808
-      helas_FFV1_0( w_fp[90], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 808 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16859,10 +16859,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 809 OF 1240 ***
 
       // Wavefunction(s) for diagram number 809
-      helas_FFV1_1( w_fp[47], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] );
+      helas_CD_FFV1_1( w_fp[47], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] );
 
       // Amplitude(s) for diagram number 809
-      helas_FFV1_0( w_fp[56], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 809 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16875,7 +16875,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 810
-      helas_FFV1_0( w_fp[22], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 810 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16888,7 +16888,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 811
-      helas_FFV1_0( w_fp[56], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 811 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16901,7 +16901,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 812
-      helas_FFV1_0( w_fp[22], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 812 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16914,7 +16914,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 813
-      helas_FFV1_0( w_fp[90], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 813 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16928,7 +16928,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 814
-      helas_FFV1_0( w_fp[52], w_fp[113], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[113], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 814 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16942,7 +16942,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 815
-      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[47], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 815 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16958,7 +16958,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 816
-      helas_FFV1_0( w_fp[90], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 816 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16972,7 +16972,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 817
-      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 817 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -16988,7 +16988,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 818
-      helas_VVV1_0( w_fp[98], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[98], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 818 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17008,7 +17008,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 819
-      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 819 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17024,7 +17024,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 820
-      helas_VVV1_0( w_fp[0], w_fp[103], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[103], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 820 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17044,7 +17044,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 821
-      helas_FFV1_0( w_fp[21], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 821 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17058,7 +17058,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 822
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[85], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[85], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
@@ -17067,7 +17067,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -17076,7 +17076,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -17092,7 +17092,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 823
-      helas_FFV1_0( w_fp[90], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 823 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17106,7 +17106,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 824
-      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 824 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17122,7 +17122,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 825
-      helas_VVV1_0( w_fp[101], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[101], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 825 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17142,7 +17142,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 826
-      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 826 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17158,7 +17158,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 827
-      helas_VVV1_0( w_fp[0], w_fp[103], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[103], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 827 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17178,7 +17178,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 828
-      helas_FFV1_0( w_fp[22], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 828 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17192,7 +17192,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 829
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -17201,7 +17201,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -17210,7 +17210,7 @@ namespace mg5amcCpu
       jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -17226,7 +17226,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 830
-      helas_FFV1_0( w_fp[90], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 830 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17240,7 +17240,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 831
-      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 831 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17256,7 +17256,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 832
-      helas_VVV1_0( w_fp[96], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[96], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 832 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17276,7 +17276,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 833
-      helas_FFV1_0( w_fp[56], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 833 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17292,7 +17292,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 834
-      helas_VVV1_0( w_fp[0], w_fp[103], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[103], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 834 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17312,7 +17312,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 835
-      helas_FFV1_0( w_fp[56], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 835 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17326,7 +17326,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 836
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[87], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[87], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
@@ -17335,7 +17335,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[34], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[34], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -17344,7 +17344,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -17360,17 +17360,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 837
-      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[64] += amp_sv[0];
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[64] -= amp_sv[0];
       jamp_sv[88] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
@@ -17382,7 +17382,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 838
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[92], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[92], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
@@ -17391,7 +17391,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
@@ -17400,7 +17400,7 @@ namespace mg5amcCpu
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[106], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[106], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -17413,10 +17413,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 839 OF 1240 ***
 
       // Wavefunction(s) for diagram number 839
-      helas_VVV1P0_1( w_fp[0], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[90] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[90] );
 
       // Amplitude(s) for diagram number 839
-      helas_VVV1_0( w_fp[90], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[90], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 839 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17444,7 +17444,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 840
-      helas_VVV1_0( w_fp[90], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[90], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 840 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17472,7 +17472,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 841
-      helas_VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -17489,7 +17489,7 @@ namespace mg5amcCpu
       jamp_sv[98] -= amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      helas_VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -17506,7 +17506,7 @@ namespace mg5amcCpu
       jamp_sv[115] += amp_sv[0];
       jamp_sv[117] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[1] += amp_sv[0];
       jamp_sv[6] += amp_sv[0];
@@ -17527,10 +17527,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 842 OF 1240 ***
 
       // Wavefunction(s) for diagram number 842
-      helas_VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[56] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[56] );
 
       // Amplitude(s) for diagram number 842
-      helas_VVV1_0( w_fp[56], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 842 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17558,7 +17558,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 843
-      helas_VVV1_0( w_fp[56], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 843 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17586,7 +17586,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 844
-      helas_VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[12] -= amp_sv[0];
@@ -17603,7 +17603,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[12] -= amp_sv[0];
@@ -17620,7 +17620,7 @@ namespace mg5amcCpu
       jamp_sv[95] += amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[1] += amp_sv[0];
       jamp_sv[6] += amp_sv[0];
@@ -17644,7 +17644,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 845
-      helas_VVV1_0( w_fp[0], w_fp[63], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[63], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 845 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17672,7 +17672,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 846
-      helas_VVV1_0( w_fp[0], w_fp[64], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[64], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 846 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -17697,12 +17697,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 847 OF 1240 ***
 
       // Wavefunction(s) for diagram number 847
-      helas_VVVV1P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[103] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[22] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[103] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[22] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 847
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[103], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[103], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[12] -= amp_sv[0];
@@ -17719,7 +17719,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -17736,7 +17736,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -17757,12 +17757,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 848 OF 1240 ***
 
       // Wavefunction(s) for diagram number 848
-      helas_VVVV1P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[105] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[95] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[107] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[105] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[95] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[107] );
 
       // Amplitude(s) for diagram number 848
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[18] -= amp_sv[0];
@@ -17779,7 +17779,7 @@ namespace mg5amcCpu
       jamp_sv[95] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -17796,7 +17796,7 @@ namespace mg5amcCpu
       jamp_sv[91] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[98] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -17817,12 +17817,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 849 OF 1240 ***
 
       // Wavefunction(s) for diagram number 849
-      helas_VVVV1P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[115] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[116] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[117] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[115] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[116] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[117] );
 
       // Amplitude(s) for diagram number 849
-      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[6], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[18] += amp_sv[0];
@@ -17839,7 +17839,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[6], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
       jamp_sv[16] += amp_sv[0];
@@ -17856,7 +17856,7 @@ namespace mg5amcCpu
       jamp_sv[105] += amp_sv[0];
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[6], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[12] -= amp_sv[0];
@@ -17877,12 +17877,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 850 OF 1240 ***
 
       // Wavefunction(s) for diagram number 850
-      helas_VVVV1P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[118] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[119] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[120] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[118] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[119] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[120] );
 
       // Amplitude(s) for diagram number 850
-      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[5], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[12] += amp_sv[0];
@@ -17899,7 +17899,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[117] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[5], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
       jamp_sv[22] += amp_sv[0];
@@ -17916,7 +17916,7 @@ namespace mg5amcCpu
       jamp_sv[98] -= amp_sv[0];
       jamp_sv[100] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[5], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[12] -= amp_sv[0];
@@ -17940,7 +17940,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 851
-      helas_VVVV1_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
@@ -17957,7 +17957,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
@@ -17974,7 +17974,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
       jamp_sv[22] += amp_sv[0];
@@ -17998,7 +17998,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 852
-      helas_VVV1_0( w_fp[8], w_fp[29], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[29], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 852 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18026,7 +18026,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 853
-      helas_VVV1_0( w_fp[61], w_fp[29], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[29], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 853 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18054,7 +18054,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 854
-      helas_VVV1_0( w_fp[61], w_fp[8], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[8], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 854 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18082,7 +18082,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 855
-      helas_VVV1_0( w_fp[90], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[90], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 855 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18102,7 +18102,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 856
-      helas_FFV1_0( w_fp[3], w_fp[44], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[44], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 856 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18118,7 +18118,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 857
-      helas_FFV1_0( w_fp[65], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 857 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18132,7 +18132,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 858
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 858 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18148,7 +18148,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 859
-      helas_FFV1_0( w_fp[65], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 859 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18162,7 +18162,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 860
-      helas_VVV1_0( w_fp[0], w_fp[64], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[64], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 860 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18182,7 +18182,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 861
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0];
@@ -18191,7 +18191,7 @@ namespace mg5amcCpu
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -18200,7 +18200,7 @@ namespace mg5amcCpu
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -18216,7 +18216,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 862
-      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[39], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 862 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18232,7 +18232,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 863
-      helas_FFV1_0( w_fp[41], w_fp[102], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[102], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 863 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18246,7 +18246,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 864
-      helas_FFV1_0( w_fp[62], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 864 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18260,7 +18260,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 865
-      helas_VVV1_0( w_fp[90], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[90], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 865 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18280,7 +18280,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 866
-      helas_FFV1_0( w_fp[3], w_fp[50], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[50], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 866 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18296,7 +18296,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 867
-      helas_FFV1_0( w_fp[65], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 867 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18310,7 +18310,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 868
-      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[113], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 868 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18326,7 +18326,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 869
-      helas_FFV1_0( w_fp[65], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 869 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18340,7 +18340,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 870
-      helas_VVV1_0( w_fp[0], w_fp[63], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[63], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 870 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18360,7 +18360,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 871
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0];
@@ -18369,7 +18369,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -18378,7 +18378,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -18394,7 +18394,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 872
-      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[47], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 872 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18410,7 +18410,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 873
-      helas_FFV1_0( w_fp[38], w_fp[113], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[113], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 873 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18424,7 +18424,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 874
-      helas_FFV1_0( w_fp[104], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 874 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18438,7 +18438,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 875
-      helas_VVV1_0( w_fp[90], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[90], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 875 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18458,7 +18458,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 876
-      helas_FFV1_0( w_fp[48], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 876 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18474,7 +18474,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 877
-      helas_FFV1_0( w_fp[104], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 877 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18488,7 +18488,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 878
-      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 878 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18504,7 +18504,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 879
-      helas_FFV1_0( w_fp[48], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 879 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18518,7 +18518,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 880
-      helas_VVV1_0( w_fp[0], w_fp[64], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[64], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 880 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18538,7 +18538,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 881
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
@@ -18547,7 +18547,7 @@ namespace mg5amcCpu
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -18556,7 +18556,7 @@ namespace mg5amcCpu
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -18572,7 +18572,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 882
-      helas_VVV1_0( w_fp[90], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[90], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 882 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18592,7 +18592,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 883
-      helas_FFV1_0( w_fp[40], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 883 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18608,7 +18608,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 884
-      helas_FFV1_0( w_fp[62], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 884 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18622,7 +18622,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 885
-      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 885 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18638,7 +18638,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 886
-      helas_FFV1_0( w_fp[40], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 886 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18652,7 +18652,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 887
-      helas_VVV1_0( w_fp[0], w_fp[63], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[63], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 887 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18672,7 +18672,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 888
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
@@ -18681,7 +18681,7 @@ namespace mg5amcCpu
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[14] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -18690,7 +18690,7 @@ namespace mg5amcCpu
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -18706,7 +18706,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 889
-      helas_FFV1_0( w_fp[3], w_fp[18], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[18], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 889 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18726,7 +18726,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 890
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 890 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18746,7 +18746,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 891
-      helas_FFV1_0( w_fp[3], w_fp[93], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[93], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 891 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18766,7 +18766,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 892
-      helas_FFV1_0( w_fp[65], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 892 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18786,7 +18786,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 893
-      helas_FFV1_0( w_fp[12], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 893 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18802,7 +18802,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 894
-      helas_FFV1_0( w_fp[65], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 894 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18815,10 +18815,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 895 OF 1240 ***
 
       // Wavefunction(s) for diagram number 895
-      helas_VVV1P0_1( w_fp[0], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[65] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[65] );
 
       // Amplitude(s) for diagram number 895
-      helas_VVV1_0( w_fp[65], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[65], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 895 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18846,7 +18846,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 896
-      helas_VVV1_0( w_fp[65], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[65], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 896 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18874,7 +18874,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 897
-      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[37] += amp_sv[0];
@@ -18891,7 +18891,7 @@ namespace mg5amcCpu
       jamp_sv[100] -= amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[12] += amp_sv[0];
       jamp_sv[36] += amp_sv[0];
@@ -18908,7 +18908,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[3] += amp_sv[0];
       jamp_sv[12] += amp_sv[0];
@@ -18932,7 +18932,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 898
-      helas_VVV1_0( w_fp[56], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 898 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18960,7 +18960,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 899
-      helas_VVV1_0( w_fp[56], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 899 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -18988,7 +18988,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 900
-      helas_VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
@@ -19005,7 +19005,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
@@ -19022,7 +19022,7 @@ namespace mg5amcCpu
       jamp_sv[83] += amp_sv[0];
       jamp_sv[107] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[3] += amp_sv[0];
       jamp_sv[10] += amp_sv[0];
@@ -19046,7 +19046,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 901
-      helas_VVV1_0( w_fp[0], w_fp[69], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[69], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 901 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19074,7 +19074,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 902
-      helas_VVV1_0( w_fp[0], w_fp[70], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[70], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 902 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19099,12 +19099,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 903 OF 1240 ***
 
       // Wavefunction(s) for diagram number 903
-      helas_VVVV1P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[93] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[90] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[93] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[90] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 903
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[93], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[93], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
@@ -19121,7 +19121,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
       jamp_sv[36] += amp_sv[0];
@@ -19138,7 +19138,7 @@ namespace mg5amcCpu
       jamp_sv[109] += amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[12] += amp_sv[0];
       jamp_sv[36] += amp_sv[0];
@@ -19159,12 +19159,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 904 OF 1240 ***
 
       // Wavefunction(s) for diagram number 904
-      helas_VVVV1P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[103] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[63] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[103] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[63] );
 
       // Amplitude(s) for diagram number 904
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[19] -= amp_sv[0];
@@ -19181,7 +19181,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[103], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[103], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
       jamp_sv[37] += amp_sv[0];
@@ -19198,7 +19198,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[97] += amp_sv[0];
       jamp_sv[100] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[63], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[63], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[37] += amp_sv[0];
@@ -19219,12 +19219,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 905 OF 1240 ***
 
       // Wavefunction(s) for diagram number 905
-      helas_VVVV1P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] );
 
       // Amplitude(s) for diagram number 905
-      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[19] += amp_sv[0];
@@ -19241,7 +19241,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
       jamp_sv[10] += amp_sv[0];
@@ -19258,7 +19258,7 @@ namespace mg5amcCpu
       jamp_sv[107] += amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[6], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
@@ -19282,7 +19282,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 906
-      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[4], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -19299,7 +19299,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[4], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
@@ -19316,7 +19316,7 @@ namespace mg5amcCpu
       jamp_sv[98] -= amp_sv[0];
       jamp_sv[99] += amp_sv[0];
       jamp_sv[100] -= amp_sv[0];
-      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[4], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
@@ -19340,7 +19340,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 907
-      helas_VVVV1_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[12] -= amp_sv[0];
@@ -19357,7 +19357,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -19374,7 +19374,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
       jamp_sv[20] += amp_sv[0];
@@ -19398,7 +19398,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 908
-      helas_VVV1_0( w_fp[8], w_fp[27], w_fp[65], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[27], w_fp[65], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 908 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19426,7 +19426,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 909
-      helas_VVV1_0( w_fp[66], w_fp[27], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[27], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 909 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19454,7 +19454,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 910
-      helas_VVV1_0( w_fp[66], w_fp[8], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[8], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 910 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19482,7 +19482,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 911
-      helas_VVV1_0( w_fp[65], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[65], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 911 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19502,7 +19502,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 912
-      helas_FFV1_0( w_fp[3], w_fp[36], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[36], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 912 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19518,7 +19518,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 913
-      helas_FFV1_0( w_fp[71], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 913 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19532,7 +19532,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 914
-      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[114], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 914 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19548,7 +19548,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 915
-      helas_FFV1_0( w_fp[71], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 915 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19562,7 +19562,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 916
-      helas_VVV1_0( w_fp[0], w_fp[70], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[70], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 916 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19582,7 +19582,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 917
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0];
@@ -19591,7 +19591,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -19600,7 +19600,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -19616,7 +19616,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 918
-      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[33], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 918 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19632,7 +19632,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 919
-      helas_FFV1_0( w_fp[41], w_fp[114], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[114], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 919 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19646,7 +19646,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 920
-      helas_FFV1_0( w_fp[62], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 920 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19660,7 +19660,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 921
-      helas_VVV1_0( w_fp[65], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[65], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 921 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19680,7 +19680,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 922
-      helas_FFV1_0( w_fp[3], w_fp[49], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[49], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 922 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19696,7 +19696,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 923
-      helas_FFV1_0( w_fp[71], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 923 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19710,7 +19710,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 924
-      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[113], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 924 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19726,7 +19726,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 925
-      helas_FFV1_0( w_fp[71], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 925 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19740,7 +19740,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 926
-      helas_VVV1_0( w_fp[0], w_fp[69], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[69], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 926 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19760,7 +19760,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 927
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -19769,7 +19769,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -19778,7 +19778,7 @@ namespace mg5amcCpu
       jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -19794,7 +19794,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 928
-      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[47], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 928 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19810,7 +19810,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 929
-      helas_FFV1_0( w_fp[46], w_fp[113], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[113], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 929 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19824,7 +19824,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 930
-      helas_FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 930 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19838,7 +19838,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 931
-      helas_VVV1_0( w_fp[65], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[65], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 931 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19858,7 +19858,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 932
-      helas_FFV1_0( w_fp[53], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 932 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19874,7 +19874,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 933
-      helas_FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 933 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19888,7 +19888,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 934
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 934 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19904,7 +19904,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 935
-      helas_FFV1_0( w_fp[53], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 935 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19918,7 +19918,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 936
-      helas_VVV1_0( w_fp[0], w_fp[70], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[70], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 936 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19938,7 +19938,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 937
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
@@ -19947,7 +19947,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -19956,7 +19956,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -19972,7 +19972,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 938
-      helas_VVV1_0( w_fp[65], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[65], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 938 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -19992,7 +19992,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 939
-      helas_FFV1_0( w_fp[28], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 939 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20008,7 +20008,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 940
-      helas_FFV1_0( w_fp[62], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 940 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20022,7 +20022,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 941
-      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 941 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20038,7 +20038,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 942
-      helas_FFV1_0( w_fp[28], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 942 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20052,7 +20052,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 943
-      helas_VVV1_0( w_fp[0], w_fp[69], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[69], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 943 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20072,7 +20072,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 944
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -20081,7 +20081,7 @@ namespace mg5amcCpu
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -20090,7 +20090,7 @@ namespace mg5amcCpu
       jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -20106,7 +20106,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 945
-      helas_FFV1_0( w_fp[3], w_fp[15], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[15], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 945 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20126,7 +20126,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 946
-      helas_FFV1_0( w_fp[14], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 946 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20146,7 +20146,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 947
-      helas_FFV1_0( w_fp[3], w_fp[94], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[94], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 947 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20166,7 +20166,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 948
-      helas_FFV1_0( w_fp[71], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 948 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20186,7 +20186,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 949
-      helas_FFV1_0( w_fp[14], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 949 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20202,7 +20202,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 950
-      helas_FFV1_0( w_fp[71], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 950 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20215,10 +20215,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 951 OF 1240 ***
 
       // Wavefunction(s) for diagram number 951
-      helas_VVV1P0_1( w_fp[0], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[71] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[71] );
 
       // Amplitude(s) for diagram number 951
-      helas_VVV1_0( w_fp[71], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[71], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 951 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20246,7 +20246,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 952
-      helas_VVV1_0( w_fp[71], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[71], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 952 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20274,7 +20274,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 953
-      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
       jamp_sv[43] += amp_sv[0];
@@ -20291,7 +20291,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[18] += amp_sv[0];
       jamp_sv[42] += amp_sv[0];
@@ -20308,7 +20308,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
       jamp_sv[18] += amp_sv[0];
@@ -20332,7 +20332,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 954
-      helas_VVV1_0( w_fp[56], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 954 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20360,7 +20360,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 955
-      helas_VVV1_0( w_fp[56], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 955 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20388,7 +20388,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 956
-      helas_VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[10] += amp_sv[0];
@@ -20405,7 +20405,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[105] += amp_sv[0];
-      helas_VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[5] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
@@ -20422,7 +20422,7 @@ namespace mg5amcCpu
       jamp_sv[83] += amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[107] += amp_sv[0];
-      helas_VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
       jamp_sv[8] += amp_sv[0];
@@ -20446,7 +20446,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 957
-      helas_VVV1_0( w_fp[0], w_fp[74], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[74], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 957 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20474,7 +20474,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 958
-      helas_VVV1_0( w_fp[0], w_fp[75], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[75], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 958 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20499,12 +20499,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 959 OF 1240 ***
 
       // Wavefunction(s) for diagram number 959
-      helas_VVVV1P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[94] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[65] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[94] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[65] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 959
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[94], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[94], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[10] += amp_sv[0];
@@ -20521,7 +20521,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[65], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[65], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[10] += amp_sv[0];
       jamp_sv[42] += amp_sv[0];
@@ -20538,7 +20538,7 @@ namespace mg5amcCpu
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[18] += amp_sv[0];
       jamp_sv[42] += amp_sv[0];
@@ -20559,12 +20559,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 960 OF 1240 ***
 
       // Wavefunction(s) for diagram number 960
-      helas_VVVV1P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[90] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[93] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[69] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[90] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[93] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[69] );
 
       // Amplitude(s) for diagram number 960
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] += amp_sv[0];
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[16] += amp_sv[0];
@@ -20581,7 +20581,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[93], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[93], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[16] += amp_sv[0];
       jamp_sv[43] += amp_sv[0];
@@ -20598,7 +20598,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[69], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[69], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
       jamp_sv[43] += amp_sv[0];
@@ -20622,7 +20622,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 961
-      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
@@ -20639,7 +20639,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[5], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
@@ -20656,7 +20656,7 @@ namespace mg5amcCpu
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[107] += amp_sv[0];
-      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[5], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
@@ -20680,7 +20680,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 962
-      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[4], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -20697,7 +20697,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
-      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[4], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
@@ -20714,7 +20714,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[105] += amp_sv[0];
-      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[4], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[10] += amp_sv[0];
@@ -20738,7 +20738,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 963
-      helas_VVVV1_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[18] -= amp_sv[0];
@@ -20755,7 +20755,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -20772,7 +20772,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[9] += amp_sv[0];
       jamp_sv[14] += amp_sv[0];
@@ -20796,7 +20796,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 964
-      helas_VVV1_0( w_fp[8], w_fp[24], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[24], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 964 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20824,7 +20824,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 965
-      helas_VVV1_0( w_fp[72], w_fp[24], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[24], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 965 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20852,7 +20852,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 966
-      helas_VVV1_0( w_fp[72], w_fp[8], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[8], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 966 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20880,7 +20880,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 967
-      helas_VVV1_0( w_fp[71], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[71], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 967 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20900,7 +20900,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 968
-      helas_FFV1_0( w_fp[3], w_fp[35], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[35], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 968 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20916,7 +20916,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 969
-      helas_FFV1_0( w_fp[76], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 969 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20930,7 +20930,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 970
-      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[114], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 970 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20946,7 +20946,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 971
-      helas_FFV1_0( w_fp[76], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 971 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20960,7 +20960,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 972
-      helas_VVV1_0( w_fp[0], w_fp[75], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[75], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 972 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -20980,7 +20980,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 973
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -20989,7 +20989,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -20998,7 +20998,7 @@ namespace mg5amcCpu
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -21014,7 +21014,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 974
-      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[33], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 974 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21030,7 +21030,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 975
-      helas_FFV1_0( w_fp[38], w_fp[114], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[114], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 975 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21044,7 +21044,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 976
-      helas_FFV1_0( w_fp[104], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 976 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21058,7 +21058,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 977
-      helas_VVV1_0( w_fp[71], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[71], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 977 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21078,7 +21078,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 978
-      helas_FFV1_0( w_fp[3], w_fp[43], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[43], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 978 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21094,7 +21094,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 979
-      helas_FFV1_0( w_fp[76], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 979 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21108,7 +21108,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 980
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 980 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21124,7 +21124,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 981
-      helas_FFV1_0( w_fp[76], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 981 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21138,7 +21138,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 982
-      helas_VVV1_0( w_fp[0], w_fp[74], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[74], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 982 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21158,7 +21158,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 983
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[94], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[94], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -21167,7 +21167,7 @@ namespace mg5amcCpu
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -21176,7 +21176,7 @@ namespace mg5amcCpu
       jamp_sv[85] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -21192,7 +21192,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 984
-      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 984 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21208,7 +21208,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 985
-      helas_FFV1_0( w_fp[46], w_fp[102], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[102], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 985 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21222,7 +21222,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 986
-      helas_FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 986 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21236,7 +21236,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 987
-      helas_VVV1_0( w_fp[71], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[71], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 987 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21256,7 +21256,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 988
-      helas_FFV1_0( w_fp[7], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 988 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21272,7 +21272,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 989
-      helas_FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 989 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21286,7 +21286,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 990
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 990 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21302,7 +21302,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 991
-      helas_FFV1_0( w_fp[7], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 991 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21316,7 +21316,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 992
-      helas_VVV1_0( w_fp[0], w_fp[75], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[75], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 992 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21336,7 +21336,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 993
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -21345,7 +21345,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -21354,7 +21354,7 @@ namespace mg5amcCpu
       jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -21370,7 +21370,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 994
-      helas_VVV1_0( w_fp[71], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[71], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 994 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21390,7 +21390,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 995
-      helas_FFV1_0( w_fp[25], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 995 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21406,7 +21406,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 996
-      helas_FFV1_0( w_fp[104], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 996 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21420,7 +21420,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 997
-      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 997 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21436,7 +21436,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 998
-      helas_FFV1_0( w_fp[25], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 998 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21450,7 +21450,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 999
-      helas_VVV1_0( w_fp[0], w_fp[74], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[74], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 999 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21470,7 +21470,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1000
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[94], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[94], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -21479,7 +21479,7 @@ namespace mg5amcCpu
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -21488,7 +21488,7 @@ namespace mg5amcCpu
       jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -21504,7 +21504,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1001
-      helas_FFV1_0( w_fp[3], w_fp[17], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[17], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1001 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21524,7 +21524,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1002
-      helas_FFV1_0( w_fp[26], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1002 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21544,7 +21544,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1003
-      helas_FFV1_0( w_fp[3], w_fp[97], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[97], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1003 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21564,7 +21564,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1004
-      helas_FFV1_0( w_fp[76], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1004 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21584,7 +21584,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1005
-      helas_FFV1_0( w_fp[26], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1005 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21600,7 +21600,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1006
-      helas_FFV1_0( w_fp[76], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1006 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21616,7 +21616,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1007
-      helas_VVV1_0( w_fp[56], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1007 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21644,7 +21644,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1008
-      helas_VVV1_0( w_fp[56], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1008 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21672,7 +21672,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1009
-      helas_VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -21689,7 +21689,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -21706,7 +21706,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
@@ -21730,7 +21730,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1010
-      helas_VVV1_0( w_fp[98], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[98], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1010 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21758,7 +21758,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1011
-      helas_VVV1_0( w_fp[98], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[98], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1011 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21786,7 +21786,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1012
-      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
@@ -21803,7 +21803,7 @@ namespace mg5amcCpu
       jamp_sv[101] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[9] += amp_sv[0];
       jamp_sv[14] += amp_sv[0];
@@ -21820,7 +21820,7 @@ namespace mg5amcCpu
       jamp_sv[103] += amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -21844,7 +21844,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1013
-      helas_VVV1_0( w_fp[0], w_fp[108], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[108], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1013 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21872,7 +21872,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1014
-      helas_VVV1_0( w_fp[0], w_fp[59], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[59], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1014 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -21897,12 +21897,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1015 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1015
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[11] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[42] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[76] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[11] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[42] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[76] );
 
       // Amplitude(s) for diagram number 1015
-      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[24], w_fp[6], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
@@ -21919,7 +21919,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[24], w_fp[6], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
@@ -21936,7 +21936,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[24], w_fp[6], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
@@ -21957,12 +21957,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1016 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1016
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[97] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[71] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[97] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[71] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 1016
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[97], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[97], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -21979,7 +21979,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -21996,7 +21996,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -22020,7 +22020,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1017
-      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[24], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[8] += amp_sv[0];
@@ -22037,7 +22037,7 @@ namespace mg5amcCpu
       jamp_sv[107] += amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[24], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
       jamp_sv[21] += amp_sv[0];
@@ -22054,7 +22054,7 @@ namespace mg5amcCpu
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[24], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -22078,7 +22078,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1018
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[85], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[85], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[21] += amp_sv[0];
@@ -22095,7 +22095,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[21] += amp_sv[0];
       jamp_sv[23] -= amp_sv[0];
       jamp_sv[28] -= amp_sv[0];
@@ -22112,7 +22112,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
@@ -22136,7 +22136,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1019
-      helas_VVV1_0( w_fp[56], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1019 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22164,7 +22164,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1020
-      helas_VVV1_0( w_fp[56], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1020 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22192,7 +22192,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1021
-      helas_VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -22209,7 +22209,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -22226,7 +22226,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -22250,7 +22250,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1022
-      helas_VVV1_0( w_fp[101], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[101], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1022 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22278,7 +22278,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1023
-      helas_VVV1_0( w_fp[101], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[101], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1023 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22306,7 +22306,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1024
-      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[11] += amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[25] -= amp_sv[0];
@@ -22323,7 +22323,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
       jamp_sv[20] += amp_sv[0];
@@ -22340,7 +22340,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
       jamp_sv[25] += amp_sv[0];
@@ -22364,7 +22364,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1025
-      helas_VVV1_0( w_fp[0], w_fp[108], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[108], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1025 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22392,7 +22392,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1026
-      helas_VVV1_0( w_fp[0], w_fp[68], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[68], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1026 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22420,7 +22420,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1027
-      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[27], w_fp[5], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[11] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
@@ -22437,7 +22437,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[27], w_fp[5], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -22454,7 +22454,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[27], w_fp[5], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -22475,12 +22475,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1028 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1028
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[10] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[16] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[10] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[16] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 1028
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -22497,7 +22497,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
       jamp_sv[25] += amp_sv[0];
@@ -22514,7 +22514,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[4] += amp_sv[0];
       jamp_sv[25] += amp_sv[0];
@@ -22538,7 +22538,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1029
-      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[27], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[4] += amp_sv[0];
       jamp_sv[10] += amp_sv[0];
@@ -22555,7 +22555,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[27], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[15] += amp_sv[0];
@@ -22572,7 +22572,7 @@ namespace mg5amcCpu
       jamp_sv[77] += amp_sv[0];
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[27], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -22596,7 +22596,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1030
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
@@ -22613,7 +22613,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[15] += amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[26] -= amp_sv[0];
@@ -22630,7 +22630,7 @@ namespace mg5amcCpu
       jamp_sv[77] += amp_sv[0];
       jamp_sv[109] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[11] += amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[25] -= amp_sv[0];
@@ -22654,7 +22654,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1031
-      helas_VVV1_0( w_fp[56], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1031 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22682,7 +22682,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1032
-      helas_VVV1_0( w_fp[56], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1032 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22710,7 +22710,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1033
-      helas_VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
@@ -22727,7 +22727,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
@@ -22744,7 +22744,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[1] += amp_sv[0];
       jamp_sv[3] += amp_sv[0];
@@ -22768,7 +22768,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1034
-      helas_VVV1_0( w_fp[96], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[96], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1034 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22796,7 +22796,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1035
-      helas_VVV1_0( w_fp[96], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[96], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1035 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22824,7 +22824,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1036
-      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[17] += amp_sv[0];
       jamp_sv[23] -= amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
@@ -22841,7 +22841,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
       jamp_sv[22] += amp_sv[0];
@@ -22858,7 +22858,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
       jamp_sv[27] += amp_sv[0];
@@ -22882,7 +22882,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1037
-      helas_VVV1_0( w_fp[0], w_fp[108], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[108], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1037 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22910,7 +22910,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1038
-      helas_VVV1_0( w_fp[0], w_fp[67], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[67], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1038 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -22938,7 +22938,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1039
-      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[29], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
@@ -22955,7 +22955,7 @@ namespace mg5amcCpu
       jamp_sv[70] += amp_sv[0];
       jamp_sv[94] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[29], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[1] += amp_sv[0];
       jamp_sv[3] += amp_sv[0];
@@ -22972,7 +22972,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[29], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[1] += amp_sv[0];
       jamp_sv[3] += amp_sv[0];
@@ -22993,12 +22993,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1040 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1040
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[76] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[42] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[11] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[76] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[42] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 1040
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
@@ -23015,7 +23015,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
       jamp_sv[27] += amp_sv[0];
@@ -23032,7 +23032,7 @@ namespace mg5amcCpu
       jamp_sv[70] += amp_sv[0];
       jamp_sv[90] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
       jamp_sv[27] += amp_sv[0];
@@ -23056,7 +23056,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1041
-      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
       jamp_sv[16] += amp_sv[0];
@@ -23073,7 +23073,7 @@ namespace mg5amcCpu
       jamp_sv[71] -= amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[29], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[9] += amp_sv[0];
@@ -23090,7 +23090,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[29], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
@@ -23114,7 +23114,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1042
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[87], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[87], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
       jamp_sv[17] += amp_sv[0];
@@ -23131,7 +23131,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[34], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[34], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[17] += amp_sv[0];
       jamp_sv[23] -= amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
@@ -23148,7 +23148,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
@@ -23172,7 +23172,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1043
-      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -23189,7 +23189,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -23206,7 +23206,7 @@ namespace mg5amcCpu
       jamp_sv[71] -= amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
       jamp_sv[17] += amp_sv[0];
@@ -23223,7 +23223,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -23240,7 +23240,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -23257,7 +23257,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[11] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
@@ -23274,7 +23274,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
@@ -23291,7 +23291,7 @@ namespace mg5amcCpu
       jamp_sv[113] += amp_sv[0];
       jamp_sv[118] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
@@ -23308,7 +23308,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
@@ -23332,7 +23332,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1044
-      helas_VVV1_0( w_fp[1], w_fp[30], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[30], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -23349,7 +23349,7 @@ namespace mg5amcCpu
       jamp_sv[71] -= amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[31], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[31], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
@@ -23366,7 +23366,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[32], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[32], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
@@ -23390,7 +23390,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1045
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
       jamp_sv[17] += amp_sv[0];
@@ -23407,7 +23407,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[11] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
@@ -23424,7 +23424,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[106], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[106], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
@@ -23448,7 +23448,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1046
-      helas_FFV1_0( w_fp[58], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1046 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23461,7 +23461,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1047
-      helas_FFV1_0( w_fp[48], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1047 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23474,7 +23474,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1048
-      helas_FFV1_0( w_fp[104], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1048 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23487,7 +23487,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1049
-      helas_FFV1_0( w_fp[104], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1049 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23500,7 +23500,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1050
-      helas_FFV1_0( w_fp[48], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1050 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23513,7 +23513,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1051
-      helas_FFV1_0( w_fp[58], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1051 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23526,7 +23526,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1052
-      helas_FFV1_0( w_fp[60], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1052 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23539,7 +23539,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1053
-      helas_FFV1_0( w_fp[40], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1053 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23552,7 +23552,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1054
-      helas_FFV1_0( w_fp[62], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1054 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23565,7 +23565,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1055
-      helas_FFV1_0( w_fp[62], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1055 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23578,7 +23578,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1056
-      helas_FFV1_0( w_fp[40], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1056 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23591,7 +23591,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1057
-      helas_FFV1_0( w_fp[60], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1057 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23604,7 +23604,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1058
-      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[114], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1058 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23620,7 +23620,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1059
-      helas_FFV1_0( w_fp[12], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1059 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23634,7 +23634,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1060
-      helas_FFV1_0( w_fp[3], w_fp[100], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[100], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1060 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23650,7 +23650,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1061
-      helas_VVV1_0( w_fp[96], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[96], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1061 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23670,7 +23670,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1062
-      helas_FFV1_0( w_fp[12], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1062 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23684,7 +23684,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1063
-      helas_VVV1_0( w_fp[0], w_fp[67], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[67], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1063 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23704,7 +23704,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1064
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[76], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[76], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0];
@@ -23713,7 +23713,7 @@ namespace mg5amcCpu
       jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -23722,7 +23722,7 @@ namespace mg5amcCpu
       jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[64] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -23738,7 +23738,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1065
-      helas_FFV1_0( w_fp[78], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1065 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23751,7 +23751,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1066
-      helas_FFV1_0( w_fp[53], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1066 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23764,7 +23764,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1067
-      helas_FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1067 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23777,7 +23777,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1068
-      helas_FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1068 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23790,7 +23790,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1069
-      helas_FFV1_0( w_fp[53], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1069 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23803,7 +23803,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1070
-      helas_FFV1_0( w_fp[78], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1070 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23816,7 +23816,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1071
-      helas_FFV1_0( w_fp[60], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1071 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23829,7 +23829,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1072
-      helas_FFV1_0( w_fp[28], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1072 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23842,7 +23842,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1073
-      helas_FFV1_0( w_fp[62], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1073 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23855,7 +23855,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1074
-      helas_FFV1_0( w_fp[62], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1074 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23868,7 +23868,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1075
-      helas_FFV1_0( w_fp[28], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1075 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23881,7 +23881,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1076
-      helas_FFV1_0( w_fp[60], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1076 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23894,7 +23894,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1077
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1077 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23910,7 +23910,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1078
-      helas_FFV1_0( w_fp[14], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1078 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23924,7 +23924,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1079
-      helas_FFV1_0( w_fp[3], w_fp[89], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[89], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1079 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23940,7 +23940,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1080
-      helas_VVV1_0( w_fp[101], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[101], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1080 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23960,7 +23960,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1081
-      helas_FFV1_0( w_fp[14], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1081 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23974,7 +23974,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1082
-      helas_VVV1_0( w_fp[0], w_fp[68], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[68], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1082 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -23994,7 +23994,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1083
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0];
@@ -24003,7 +24003,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -24012,7 +24012,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -24028,7 +24028,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1084
-      helas_FFV1_0( w_fp[78], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1084 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24041,7 +24041,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1085
-      helas_FFV1_0( w_fp[7], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1085 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24054,7 +24054,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1086
-      helas_FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1086 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24067,7 +24067,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1087
-      helas_FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1087 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24080,7 +24080,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1088
-      helas_FFV1_0( w_fp[7], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1088 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24093,7 +24093,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1089
-      helas_FFV1_0( w_fp[78], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1089 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24106,7 +24106,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1090
-      helas_FFV1_0( w_fp[58], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1090 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24119,7 +24119,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1091
-      helas_FFV1_0( w_fp[25], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1091 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24132,7 +24132,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1092
-      helas_FFV1_0( w_fp[104], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1092 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24145,7 +24145,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1093
-      helas_FFV1_0( w_fp[104], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1093 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24158,7 +24158,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1094
-      helas_FFV1_0( w_fp[25], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1094 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24171,7 +24171,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1095
-      helas_FFV1_0( w_fp[58], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1095 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24184,7 +24184,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1096
-      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[113], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1096 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24200,7 +24200,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1097
-      helas_FFV1_0( w_fp[26], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1097 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24214,7 +24214,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1098
-      helas_FFV1_0( w_fp[3], w_fp[91], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[91], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1098 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24230,7 +24230,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1099
-      helas_VVV1_0( w_fp[98], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[98], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1099 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24250,7 +24250,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1100
-      helas_FFV1_0( w_fp[26], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1100 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24264,7 +24264,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1101
-      helas_VVV1_0( w_fp[0], w_fp[59], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[59], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1101 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24284,7 +24284,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1102
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0];
@@ -24293,7 +24293,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -24302,7 +24302,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -24318,7 +24318,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1103
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1103 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24334,7 +24334,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1104
-      helas_FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1104 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24348,7 +24348,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1105
-      helas_FFV1_0( w_fp[78], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1105 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24364,7 +24364,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1106
-      helas_VVV1_0( w_fp[96], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[96], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1106 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24384,7 +24384,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1107
-      helas_FFV1_0( w_fp[78], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1107 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24398,7 +24398,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1108
-      helas_VVV1_0( w_fp[0], w_fp[67], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[67], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1108 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24418,7 +24418,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1109
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[76], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[76], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
@@ -24427,7 +24427,7 @@ namespace mg5amcCpu
       jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -24436,7 +24436,7 @@ namespace mg5amcCpu
       jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -24452,7 +24452,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1110
-      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1110 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24468,7 +24468,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1111
-      helas_FFV1_0( w_fp[104], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1111 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24482,7 +24482,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1112
-      helas_FFV1_0( w_fp[58], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1112 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24498,7 +24498,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1113
-      helas_VVV1_0( w_fp[101], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[101], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1113 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24518,7 +24518,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1114
-      helas_FFV1_0( w_fp[58], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1114 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24532,7 +24532,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1115
-      helas_VVV1_0( w_fp[0], w_fp[68], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[68], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1115 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24552,7 +24552,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1116
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
@@ -24561,7 +24561,7 @@ namespace mg5amcCpu
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -24570,7 +24570,7 @@ namespace mg5amcCpu
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -24586,7 +24586,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1117
-      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1117 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24602,7 +24602,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1118
-      helas_FFV1_0( w_fp[62], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1118 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24616,7 +24616,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1119
-      helas_FFV1_0( w_fp[60], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1119 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24632,7 +24632,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1120
-      helas_VVV1_0( w_fp[98], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[98], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1120 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24652,7 +24652,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1121
-      helas_FFV1_0( w_fp[60], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1121 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24666,7 +24666,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1122
-      helas_VVV1_0( w_fp[0], w_fp[59], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[59], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1122 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -24686,7 +24686,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1123
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -24695,7 +24695,7 @@ namespace mg5amcCpu
       jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[14] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -24704,7 +24704,7 @@ namespace mg5amcCpu
       jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -24717,12 +24717,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1124 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1124
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[71] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[97] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[71] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[97] );
 
       // Amplitude(s) for diagram number 1124
-      helas_VVVV1_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
@@ -24739,7 +24739,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV3_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -24756,7 +24756,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV4_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -24773,7 +24773,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -24790,7 +24790,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVVV3_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -24807,7 +24807,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVVV4_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -24824,7 +24824,7 @@ namespace mg5amcCpu
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[1] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -24841,7 +24841,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[118] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -24858,7 +24858,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[118] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -24879,12 +24879,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1125 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1125
-      helas_VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[59] );
-      helas_VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] );
-      helas_VVV1P0_1( w_fp[97], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[60] );
+      helas_CD_VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[59] );
+      helas_CD_VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] );
+      helas_CD_VVV1P0_1( w_fp[97], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[60] );
 
       // Amplitude(s) for diagram number 1125
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[59], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[59], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -24901,7 +24901,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -24918,7 +24918,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -24939,12 +24939,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1126 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1126
-      helas_VVV1P0_1( w_fp[21], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[17] );
-      helas_VVV1P0_1( w_fp[71], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[98] );
-      helas_VVV1P0_1( w_fp[97], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[111] );
+      helas_CD_VVV1P0_1( w_fp[21], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[17] );
+      helas_CD_VVV1P0_1( w_fp[71], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[98] );
+      helas_CD_VVV1P0_1( w_fp[97], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 1126
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -24961,7 +24961,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -24978,7 +24978,7 @@ namespace mg5amcCpu
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -25002,7 +25002,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1127
-      helas_VVV1_0( w_fp[21], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[21], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
@@ -25019,7 +25019,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVV1_0( w_fp[71], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[71], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -25036,7 +25036,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVV1_0( w_fp[97], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[97], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[1] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -25057,22 +25057,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 1128 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1128
-      helas_FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      helas_FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
-      helas_FFV1_2( w_fp[3], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
 
       // Amplitude(s) for diagram number 1128
-      helas_FFV1_0( w_fp[16], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[90] += amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
-      helas_FFV1_0( w_fp[10], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
-      helas_FFV1_0( w_fp[68], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[68], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[94] += amp_sv[0];
@@ -25084,7 +25084,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1129
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25093,7 +25093,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25102,7 +25102,7 @@ namespace mg5amcCpu
       jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -25118,17 +25118,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1130
-      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] += amp_sv[0];
       jamp_sv[74] -= amp_sv[0];
       jamp_sv[80] -= amp_sv[0];
       jamp_sv[86] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[74] -= amp_sv[0];
       jamp_sv[78] += amp_sv[0];
       jamp_sv[80] -= amp_sv[0];
       jamp_sv[84] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[39], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] -= amp_sv[0];
       jamp_sv[78] += amp_sv[0];
       jamp_sv[84] += amp_sv[0];
@@ -25140,17 +25140,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1131
-      helas_FFV1_0( w_fp[16], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[114] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_FFV1_0( w_fp[10], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_FFV1_0( w_fp[68], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[68], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[118] += amp_sv[0];
@@ -25162,7 +25162,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1132
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25171,7 +25171,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25180,7 +25180,7 @@ namespace mg5amcCpu
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -25196,17 +25196,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1133
-      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] += amp_sv[0];
       jamp_sv[98] -= amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[98] -= amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[108] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[47], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] -= amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[108] += amp_sv[0];
@@ -25215,22 +25215,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 1134 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1134
-      helas_FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
-      helas_FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
-      helas_FFV1_1( w_fp[2], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
 
       // Amplitude(s) for diagram number 1134
-      helas_FFV1_0( w_fp[38], w_fp[23], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[23], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
       jamp_sv[55] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[21], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[21], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
       jamp_sv[49] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[71], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[71], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -25242,7 +25242,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1135
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25251,7 +25251,7 @@ namespace mg5amcCpu
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25260,7 +25260,7 @@ namespace mg5amcCpu
       jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -25276,17 +25276,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1136
-      helas_FFV1_0( w_fp[41], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
       jamp_sv[54] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[21], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[21], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
       jamp_sv[48] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[71], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[71], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -25298,7 +25298,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1137
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25307,7 +25307,7 @@ namespace mg5amcCpu
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25316,7 +25316,7 @@ namespace mg5amcCpu
       jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -25332,7 +25332,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1138
-      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[23], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25341,7 +25341,7 @@ namespace mg5amcCpu
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[55] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[21], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[21], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -25350,7 +25350,7 @@ namespace mg5amcCpu
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[71], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[71], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -25366,7 +25366,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1139
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25375,7 +25375,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[10], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25384,7 +25384,7 @@ namespace mg5amcCpu
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[68], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[68], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -25397,12 +25397,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1140 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1140
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[68] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[29] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[10] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[68] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[29] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 1140
-      helas_VVVV1_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[12] -= amp_sv[0];
@@ -25419,7 +25419,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_VVVV3_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
@@ -25436,7 +25436,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -25453,7 +25453,7 @@ namespace mg5amcCpu
       jamp_sv[100] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[26] += amp_sv[0];
@@ -25470,7 +25470,7 @@ namespace mg5amcCpu
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      helas_VVVV3_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
@@ -25487,7 +25487,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -25504,7 +25504,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[3] += amp_sv[0];
       jamp_sv[26] += amp_sv[0];
@@ -25521,7 +25521,7 @@ namespace mg5amcCpu
       jamp_sv[110] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -25538,7 +25538,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      helas_VVVV4_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -25559,12 +25559,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1141 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1141
-      helas_VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[16] );
-      helas_VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[71] );
-      helas_VVV1P0_1( w_fp[10], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[16] );
+      helas_CD_VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[71] );
+      helas_CD_VVV1P0_1( w_fp[10], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 1141
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
@@ -25581,7 +25581,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
@@ -25598,7 +25598,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -25619,12 +25619,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1142 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1142
-      helas_VVV1P0_1( w_fp[68], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] );
-      helas_VVV1P0_1( w_fp[29], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[60] );
-      helas_VVV1P0_1( w_fp[10], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[20] );
+      helas_CD_VVV1P0_1( w_fp[68], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_CD_VVV1P0_1( w_fp[29], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[60] );
+      helas_CD_VVV1P0_1( w_fp[10], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[20] );
 
       // Amplitude(s) for diagram number 1142
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -25641,7 +25641,7 @@ namespace mg5amcCpu
       jamp_sv[100] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -25658,7 +25658,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[48] += amp_sv[0];
@@ -25682,7 +25682,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1143
-      helas_VVV1_0( w_fp[68], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[68], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[12] -= amp_sv[0];
@@ -25699,7 +25699,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_VVV1_0( w_fp[29], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[29], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[26] += amp_sv[0];
@@ -25716,7 +25716,7 @@ namespace mg5amcCpu
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      helas_VVV1_0( w_fp[10], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[10], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[3] += amp_sv[0];
       jamp_sv[26] += amp_sv[0];
@@ -25737,22 +25737,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 1144 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1144
-      helas_FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
-      helas_FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[111] );
-      helas_FFV1_2( w_fp[3], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[111] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
 
       // Amplitude(s) for diagram number 1144
-      helas_FFV1_0( w_fp[59], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[59], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[66] += amp_sv[0];
       jamp_sv[67] -= amp_sv[0];
       jamp_sv[69] -= amp_sv[0];
       jamp_sv[71] += amp_sv[0];
-      helas_FFV1_0( w_fp[111], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[111], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[67] -= amp_sv[0];
       jamp_sv[68] += amp_sv[0];
       jamp_sv[69] -= amp_sv[0];
       jamp_sv[70] += amp_sv[0];
-      helas_FFV1_0( w_fp[98], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[98], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[66] -= amp_sv[0];
       jamp_sv[68] += amp_sv[0];
       jamp_sv[70] += amp_sv[0];
@@ -25764,7 +25764,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1145
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25773,7 +25773,7 @@ namespace mg5amcCpu
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25782,7 +25782,7 @@ namespace mg5amcCpu
       jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -25798,17 +25798,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1146
-      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] += amp_sv[0];
       jamp_sv[50] -= amp_sv[0];
       jamp_sv[56] -= amp_sv[0];
       jamp_sv[62] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[50] -= amp_sv[0];
       jamp_sv[54] += amp_sv[0];
       jamp_sv[56] -= amp_sv[0];
       jamp_sv[60] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[33], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] -= amp_sv[0];
       jamp_sv[54] += amp_sv[0];
       jamp_sv[60] += amp_sv[0];
@@ -25820,17 +25820,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1147
-      helas_FFV1_0( w_fp[59], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[59], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[108] += amp_sv[0];
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_FFV1_0( w_fp[111], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[111], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      helas_FFV1_0( w_fp[98], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[98], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[108] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
@@ -25842,7 +25842,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1148
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25851,7 +25851,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25860,7 +25860,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -25876,17 +25876,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1149
-      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[47], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[97] += amp_sv[0];
       jamp_sv[100] -= amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[47], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[100] -= amp_sv[0];
       jamp_sv[103] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[114] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[47], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[103] += amp_sv[0];
       jamp_sv[114] += amp_sv[0];
@@ -25895,22 +25895,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 1150 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1150
-      helas_FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
-      helas_FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
-      helas_FFV1_1( w_fp[2], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] );
 
       // Amplitude(s) for diagram number 1150
-      helas_FFV1_0( w_fp[46], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
       jamp_sv[79] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[68], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[68], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
       jamp_sv[73] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[29], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[29], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[73] += amp_sv[0];
@@ -25922,7 +25922,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1151
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25931,7 +25931,7 @@ namespace mg5amcCpu
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25940,7 +25940,7 @@ namespace mg5amcCpu
       jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -25956,17 +25956,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1152
-      helas_FFV1_0( w_fp[41], w_fp[17], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[17], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] += amp_sv[0];
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
       jamp_sv[78] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[68], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[68], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
       jamp_sv[72] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[29], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[29], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[72] += amp_sv[0];
@@ -25978,7 +25978,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1153
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25987,7 +25987,7 @@ namespace mg5amcCpu
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0];
@@ -25996,7 +25996,7 @@ namespace mg5amcCpu
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26012,7 +26012,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1154
-      helas_FFV1_0( w_fp[3], w_fp[17], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[17], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26021,7 +26021,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[79] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[68], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[68], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26030,7 +26030,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[29], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[29], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26046,7 +26046,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1155
-      helas_FFV1_0( w_fp[59], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[59], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26055,7 +26055,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[111], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[111], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26064,7 +26064,7 @@ namespace mg5amcCpu
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[98], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[98], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26077,12 +26077,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1156 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1156
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[98] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[27] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[98] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[27] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 1156
-      helas_VVVV1_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[18] -= amp_sv[0];
@@ -26099,7 +26099,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
@@ -26116,7 +26116,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[102] += amp_sv[0];
-      helas_VVVV4_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[5] += amp_sv[0];
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
@@ -26133,7 +26133,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[103] += amp_sv[0];
-      helas_VVVV1_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
       jamp_sv[28] += amp_sv[0];
@@ -26150,7 +26150,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[97] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
@@ -26167,7 +26167,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[96] += amp_sv[0];
-      helas_VVVV4_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
@@ -26184,7 +26184,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[97] += amp_sv[0];
-      helas_VVVV1_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
       jamp_sv[28] += amp_sv[0];
@@ -26201,7 +26201,7 @@ namespace mg5amcCpu
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[103] += amp_sv[0];
-      helas_VVVV3_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -26218,7 +26218,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -26239,12 +26239,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1157 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1157
-      helas_VVV1P0_1( w_fp[98], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[59] );
-      helas_VVV1P0_1( w_fp[27], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[29] );
-      helas_VVV1P0_1( w_fp[111], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[68] );
+      helas_CD_VVV1P0_1( w_fp[98], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[59] );
+      helas_CD_VVV1P0_1( w_fp[27], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[29] );
+      helas_CD_VVV1P0_1( w_fp[111], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[68] );
 
       // Amplitude(s) for diagram number 1157
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[59], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[59], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
@@ -26261,7 +26261,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[102] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
@@ -26278,7 +26278,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[96] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -26299,12 +26299,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1158 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1158
-      helas_VVV1P0_1( w_fp[98], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[17] );
-      helas_VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[21] );
-      helas_VVV1P0_1( w_fp[111], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[71] );
+      helas_CD_VVV1P0_1( w_fp[98], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[17] );
+      helas_CD_VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVV1P0_1( w_fp[111], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[71] );
 
       // Amplitude(s) for diagram number 1158
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] += amp_sv[0];
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
@@ -26321,7 +26321,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[103] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
@@ -26338,7 +26338,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[97] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[49] += amp_sv[0];
@@ -26362,7 +26362,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1159
-      helas_VVV1_0( w_fp[98], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[98], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[18] -= amp_sv[0];
@@ -26379,7 +26379,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      helas_VVV1_0( w_fp[27], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[27], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
       jamp_sv[28] += amp_sv[0];
@@ -26396,7 +26396,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[97] -= amp_sv[0];
-      helas_VVV1_0( w_fp[111], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[111], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
       jamp_sv[28] += amp_sv[0];
@@ -26417,22 +26417,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 1160 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1160
-      helas_FFV1_2( w_fp[3], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      helas_FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
-      helas_FFV1_2( w_fp[3], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
 
       // Amplitude(s) for diagram number 1160
-      helas_FFV1_0( w_fp[16], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[60] += amp_sv[0];
       jamp_sv[61] -= amp_sv[0];
       jamp_sv[63] -= amp_sv[0];
       jamp_sv[65] += amp_sv[0];
-      helas_FFV1_0( w_fp[20], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[20], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[61] -= amp_sv[0];
       jamp_sv[62] += amp_sv[0];
       jamp_sv[63] -= amp_sv[0];
       jamp_sv[64] += amp_sv[0];
-      helas_FFV1_0( w_fp[60], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[60] -= amp_sv[0];
       jamp_sv[62] += amp_sv[0];
       jamp_sv[64] += amp_sv[0];
@@ -26444,7 +26444,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1161
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26453,7 +26453,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26462,7 +26462,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26478,17 +26478,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1162
-      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[33], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[49] += amp_sv[0];
       jamp_sv[52] -= amp_sv[0];
       jamp_sv[58] -= amp_sv[0];
       jamp_sv[68] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[33], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[52] -= amp_sv[0];
       jamp_sv[55] += amp_sv[0];
       jamp_sv[58] -= amp_sv[0];
       jamp_sv[66] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[33], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[49] -= amp_sv[0];
       jamp_sv[55] += amp_sv[0];
       jamp_sv[66] += amp_sv[0];
@@ -26500,17 +26500,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1163
-      helas_FFV1_0( w_fp[16], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[84] += amp_sv[0];
       jamp_sv[85] -= amp_sv[0];
       jamp_sv[87] -= amp_sv[0];
       jamp_sv[89] += amp_sv[0];
-      helas_FFV1_0( w_fp[20], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[20], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[85] -= amp_sv[0];
       jamp_sv[86] += amp_sv[0];
       jamp_sv[87] -= amp_sv[0];
       jamp_sv[88] += amp_sv[0];
-      helas_FFV1_0( w_fp[60], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[84] -= amp_sv[0];
       jamp_sv[86] += amp_sv[0];
       jamp_sv[88] += amp_sv[0];
@@ -26522,7 +26522,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1164
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26531,7 +26531,7 @@ namespace mg5amcCpu
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26540,7 +26540,7 @@ namespace mg5amcCpu
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26556,17 +26556,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1165
-      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[73] += amp_sv[0];
       jamp_sv[76] -= amp_sv[0];
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[76] -= amp_sv[0];
       jamp_sv[79] += amp_sv[0];
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[90] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[73] -= amp_sv[0];
       jamp_sv[79] += amp_sv[0];
       jamp_sv[90] += amp_sv[0];
@@ -26575,22 +26575,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 1166 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1166
-      helas_FFV1_1( w_fp[2], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
-      helas_FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
-      helas_FFV1_1( w_fp[2], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
 
       // Amplitude(s) for diagram number 1166
-      helas_FFV1_0( w_fp[46], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[5] += amp_sv[0];
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
       jamp_sv[103] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[98], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[98], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
       jamp_sv[97] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[27], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[27], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[97] += amp_sv[0];
@@ -26602,7 +26602,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1167
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26611,7 +26611,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26620,7 +26620,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26636,17 +26636,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1168
-      helas_FFV1_0( w_fp[38], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] += amp_sv[0];
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
       jamp_sv[102] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[98], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[98], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
       jamp_sv[96] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[27], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[27], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
@@ -26658,7 +26658,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1169
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26667,7 +26667,7 @@ namespace mg5amcCpu
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26676,7 +26676,7 @@ namespace mg5amcCpu
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26692,7 +26692,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1170
-      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[23], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26701,7 +26701,7 @@ namespace mg5amcCpu
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[98], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[98], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26710,7 +26710,7 @@ namespace mg5amcCpu
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[27], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[27], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26726,7 +26726,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1171
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26735,7 +26735,7 @@ namespace mg5amcCpu
       jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[20], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[20], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26744,7 +26744,7 @@ namespace mg5amcCpu
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[60], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[64] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26757,25 +26757,25 @@ namespace mg5amcCpu
       // *** DIAGRAM 1172 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1172
-      helas_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[60] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[20] );
-      helas_FFV1_2( w_fp[3], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      helas_FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
-      helas_FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[60] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[20] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
 
       // Amplitude(s) for diagram number 1172
-      helas_FFV1_0( w_fp[16], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[42] += amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
       jamp_sv[45] -= amp_sv[0];
       jamp_sv[47] += amp_sv[0];
-      helas_FFV1_0( w_fp[27], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[27], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[43] -= amp_sv[0];
       jamp_sv[44] += amp_sv[0];
       jamp_sv[45] -= amp_sv[0];
       jamp_sv[46] += amp_sv[0];
-      helas_FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[42] -= amp_sv[0];
       jamp_sv[44] += amp_sv[0];
       jamp_sv[46] += amp_sv[0];
@@ -26784,12 +26784,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1173 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1173
-      helas_VVV1P0_1( w_fp[60], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] );
-      helas_VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[68] );
-      helas_VVV1P0_1( w_fp[20], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] );
+      helas_CD_VVV1P0_1( w_fp[60], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_CD_VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[68] );
+      helas_CD_VVV1P0_1( w_fp[20], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] );
 
       // Amplitude(s) for diagram number 1173
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26798,7 +26798,7 @@ namespace mg5amcCpu
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26807,7 +26807,7 @@ namespace mg5amcCpu
       jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26823,17 +26823,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1174
-      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[77], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] += amp_sv[0];
       jamp_sv[26] -= amp_sv[0];
       jamp_sv[32] -= amp_sv[0];
       jamp_sv[38] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[26] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
       jamp_sv[32] -= amp_sv[0];
       jamp_sv[36] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[77], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[24] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
       jamp_sv[36] += amp_sv[0];
@@ -26842,22 +26842,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 1175 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1175
-      helas_FFV1_1( w_fp[2], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
-      helas_FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
-      helas_FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 1175
-      helas_FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[61] -= amp_sv[0];
       jamp_sv[85] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[71], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[71], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[51] += amp_sv[0];
       jamp_sv[61] -= amp_sv[0];
       jamp_sv[75] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[21], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[21], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[51] += amp_sv[0];
       jamp_sv[75] += amp_sv[0];
@@ -26869,7 +26869,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1176
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26878,7 +26878,7 @@ namespace mg5amcCpu
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26887,7 +26887,7 @@ namespace mg5amcCpu
       jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26903,17 +26903,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1177
-      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[99] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[47], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
       jamp_sv[115] += amp_sv[0];
@@ -26925,7 +26925,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1178
-      helas_FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26934,7 +26934,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[85] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[71], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[71], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26943,7 +26943,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[21], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[21], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26959,7 +26959,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1179
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26968,7 +26968,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[27], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[27], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0];
@@ -26977,7 +26977,7 @@ namespace mg5amcCpu
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -26993,7 +26993,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1180
-      helas_VVV1_0( w_fp[60], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[60], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[9] += amp_sv[0];
       jamp_sv[14] += amp_sv[0];
@@ -27010,7 +27010,7 @@ namespace mg5amcCpu
       jamp_sv[103] += amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      helas_VVV1_0( w_fp[24], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[24], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[14] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
@@ -27027,7 +27027,7 @@ namespace mg5amcCpu
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      helas_VVV1_0( w_fp[20], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[20], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[8] += amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
@@ -27051,7 +27051,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1181
-      helas_VVVV1_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[8] += amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
@@ -27068,7 +27068,7 @@ namespace mg5amcCpu
       jamp_sv[107] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[8] += amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
@@ -27085,7 +27085,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[107] += amp_sv[0];
-      helas_VVVV4_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -27102,7 +27102,7 @@ namespace mg5amcCpu
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVVV1_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -27119,7 +27119,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[43] += amp_sv[0];
@@ -27136,7 +27136,7 @@ namespace mg5amcCpu
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      helas_VVVV4_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[15] += amp_sv[0];
       jamp_sv[26] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -27153,7 +27153,7 @@ namespace mg5amcCpu
       jamp_sv[109] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVVV1_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -27170,7 +27170,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVVV3_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[9] += amp_sv[0];
       jamp_sv[42] += amp_sv[0];
@@ -27187,7 +27187,7 @@ namespace mg5amcCpu
       jamp_sv[104] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -27208,12 +27208,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1182 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1182
-      helas_VVV1P0_1( w_fp[60], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[72] );
-      helas_VVV1P0_1( w_fp[24], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[60] );
-      helas_VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[24] );
+      helas_CD_VVV1P0_1( w_fp[60], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[72] );
+      helas_CD_VVV1P0_1( w_fp[24], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[60] );
+      helas_CD_VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[24] );
 
       // Amplitude(s) for diagram number 1182
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[72], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[72], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[8] += amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
@@ -27230,7 +27230,7 @@ namespace mg5amcCpu
       jamp_sv[107] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[26] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -27247,7 +27247,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[24] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -27271,7 +27271,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1183
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[24] += amp_sv[0];
@@ -27288,7 +27288,7 @@ namespace mg5amcCpu
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[15] += amp_sv[0];
       jamp_sv[26] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -27305,7 +27305,7 @@ namespace mg5amcCpu
       jamp_sv[109] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[24] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -27329,7 +27329,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1184
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27338,7 +27338,7 @@ namespace mg5amcCpu
       jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27347,7 +27347,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27363,17 +27363,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1185
-      helas_FFV1_0( w_fp[16], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[102] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[107] += amp_sv[0];
-      helas_FFV1_0( w_fp[27], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[27], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      helas_FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
@@ -27385,7 +27385,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1186
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27394,7 +27394,7 @@ namespace mg5amcCpu
       jamp_sv[38] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27403,7 +27403,7 @@ namespace mg5amcCpu
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27419,17 +27419,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1187
-      helas_FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[8] += amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[60] -= amp_sv[0];
       jamp_sv[84] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[71], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[71], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[50] += amp_sv[0];
       jamp_sv[60] -= amp_sv[0];
       jamp_sv[74] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[21], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[21], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[50] += amp_sv[0];
       jamp_sv[74] += amp_sv[0];
@@ -27438,25 +27438,25 @@ namespace mg5amcCpu
       // *** DIAGRAM 1188 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1188
-      helas_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[71] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[59] );
-      helas_FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] );
-      helas_FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
-      helas_FFV1_2( w_fp[3], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[71] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[59] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] );
 
       // Amplitude(s) for diagram number 1188
-      helas_FFV1_0( w_fp[24], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[24], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[36] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
       jamp_sv[39] -= amp_sv[0];
       jamp_sv[41] += amp_sv[0];
-      helas_FFV1_0( w_fp[60], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[37] -= amp_sv[0];
       jamp_sv[38] += amp_sv[0];
       jamp_sv[39] -= amp_sv[0];
       jamp_sv[40] += amp_sv[0];
-      helas_FFV1_0( w_fp[72], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[72], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[36] -= amp_sv[0];
       jamp_sv[38] += amp_sv[0];
       jamp_sv[40] += amp_sv[0];
@@ -27465,12 +27465,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1189 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1189
-      helas_VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[98] );
-      helas_VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[27] );
-      helas_VVV1P0_1( w_fp[59], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] );
+      helas_CD_VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[98] );
+      helas_CD_VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[27] );
+      helas_CD_VVV1P0_1( w_fp[59], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] );
 
       // Amplitude(s) for diagram number 1189
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27479,7 +27479,7 @@ namespace mg5amcCpu
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27488,7 +27488,7 @@ namespace mg5amcCpu
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -27504,17 +27504,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1190
-      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[77], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[25] += amp_sv[0];
       jamp_sv[28] -= amp_sv[0];
       jamp_sv[34] -= amp_sv[0];
       jamp_sv[44] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[77], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[28] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
       jamp_sv[34] -= amp_sv[0];
       jamp_sv[42] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[77], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[25] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
       jamp_sv[42] += amp_sv[0];
@@ -27523,22 +27523,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 1191 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1191
-      helas_FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] );
-      helas_FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
-      helas_FFV1_1( w_fp[2], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
 
       // Amplitude(s) for diagram number 1191
-      helas_FFV1_0( w_fp[52], w_fp[29], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[29], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[11] += amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[67] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[68], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[68], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[53] += amp_sv[0];
       jamp_sv[67] -= amp_sv[0];
       jamp_sv[99] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[53] += amp_sv[0];
       jamp_sv[99] += amp_sv[0];
@@ -27550,7 +27550,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1192
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27559,7 +27559,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27568,7 +27568,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -27584,17 +27584,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1193
-      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[75] += amp_sv[0];
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[85] += amp_sv[0];
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[39], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[75] -= amp_sv[0];
       jamp_sv[85] += amp_sv[0];
       jamp_sv[91] += amp_sv[0];
@@ -27606,7 +27606,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1194
-      helas_FFV1_0( w_fp[3], w_fp[29], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[29], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27615,7 +27615,7 @@ namespace mg5amcCpu
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[68], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[68], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -27624,7 +27624,7 @@ namespace mg5amcCpu
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[23], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -27640,7 +27640,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1195
-      helas_FFV1_0( w_fp[24], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[24], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27649,7 +27649,7 @@ namespace mg5amcCpu
       jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[60], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0];
@@ -27658,7 +27658,7 @@ namespace mg5amcCpu
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[72], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[72], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[40] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -27674,7 +27674,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1196
-      helas_VVV1_0( w_fp[21], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[21], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
       jamp_sv[20] += amp_sv[0];
@@ -27691,7 +27691,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      helas_VVV1_0( w_fp[71], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[71], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[20] += amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -27708,7 +27708,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[98] -= amp_sv[0];
       jamp_sv[99] += amp_sv[0];
-      helas_VVV1_0( w_fp[59], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[59], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[10] += amp_sv[0];
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
@@ -27732,7 +27732,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1197
-      helas_VVVV1_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[10] += amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[25] -= amp_sv[0];
@@ -27749,7 +27749,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[108] += amp_sv[0];
-      helas_VVVV3_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[10] += amp_sv[0];
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
@@ -27766,7 +27766,7 @@ namespace mg5amcCpu
       jamp_sv[83] += amp_sv[0];
       jamp_sv[108] += amp_sv[0];
       jamp_sv[109] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
       jamp_sv[25] += amp_sv[0];
@@ -27783,7 +27783,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
       jamp_sv[109] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -27800,7 +27800,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      helas_VVVV3_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
       jamp_sv[37] += amp_sv[0];
@@ -27817,7 +27817,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[21] += amp_sv[0];
       jamp_sv[28] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -27834,7 +27834,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -27851,7 +27851,7 @@ namespace mg5amcCpu
       jamp_sv[94] += amp_sv[0];
       jamp_sv[98] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
       jamp_sv[36] += amp_sv[0];
@@ -27868,7 +27868,7 @@ namespace mg5amcCpu
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      helas_VVVV4_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[11] += amp_sv[0];
       jamp_sv[25] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -27889,12 +27889,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1198 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1198
-      helas_VVV1P0_1( w_fp[21], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[66] );
-      helas_VVV1P0_1( w_fp[71], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] );
-      helas_VVV1P0_1( w_fp[59], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[71] );
+      helas_CD_VVV1P0_1( w_fp[21], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[66] );
+      helas_CD_VVV1P0_1( w_fp[71], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVV1P0_1( w_fp[59], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[71] );
 
       // Amplitude(s) for diagram number 1198
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[66], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[66], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[10] += amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[25] -= amp_sv[0];
@@ -27911,7 +27911,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[108] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -27928,7 +27928,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[25] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -27952,7 +27952,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1199
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
       jamp_sv[25] += amp_sv[0];
@@ -27969,7 +27969,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
       jamp_sv[109] -= amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[21] += amp_sv[0];
       jamp_sv[28] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -27986,7 +27986,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[11] += amp_sv[0];
       jamp_sv[25] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -28010,7 +28010,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1200
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28019,7 +28019,7 @@ namespace mg5amcCpu
       jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28028,7 +28028,7 @@ namespace mg5amcCpu
       jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28044,17 +28044,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1201
-      helas_FFV1_0( w_fp[24], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[24], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[78] += amp_sv[0];
       jamp_sv[79] -= amp_sv[0];
       jamp_sv[81] -= amp_sv[0];
       jamp_sv[83] += amp_sv[0];
-      helas_FFV1_0( w_fp[60], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[79] -= amp_sv[0];
       jamp_sv[80] += amp_sv[0];
       jamp_sv[81] -= amp_sv[0];
       jamp_sv[82] += amp_sv[0];
-      helas_FFV1_0( w_fp[72], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[72], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[78] -= amp_sv[0];
       jamp_sv[80] += amp_sv[0];
       jamp_sv[82] += amp_sv[0];
@@ -28066,7 +28066,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1202
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28075,7 +28075,7 @@ namespace mg5amcCpu
       jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28084,7 +28084,7 @@ namespace mg5amcCpu
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28100,17 +28100,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1203
-      helas_FFV1_0( w_fp[38], w_fp[29], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[29], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[10] += amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[66] -= amp_sv[0];
       jamp_sv[108] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[68], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[68], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[52] += amp_sv[0];
       jamp_sv[66] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[23], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[23], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[52] += amp_sv[0];
       jamp_sv[98] += amp_sv[0];
@@ -28119,25 +28119,25 @@ namespace mg5amcCpu
       // *** DIAGRAM 1204 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1204
-      helas_VVVV1P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[68] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[29] );
-      helas_FFV1_2( w_fp[3], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
-      helas_FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
-      helas_FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[68] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[29] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] );
 
       // Amplitude(s) for diagram number 1204
-      helas_FFV1_0( w_fp[71], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[30] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
       jamp_sv[33] -= amp_sv[0];
       jamp_sv[35] += amp_sv[0];
-      helas_FFV1_0( w_fp[21], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[31] -= amp_sv[0];
       jamp_sv[32] += amp_sv[0];
       jamp_sv[33] -= amp_sv[0];
       jamp_sv[34] += amp_sv[0];
-      helas_FFV1_0( w_fp[66], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[66], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[30] -= amp_sv[0];
       jamp_sv[32] += amp_sv[0];
       jamp_sv[34] += amp_sv[0];
@@ -28146,12 +28146,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1205 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1205
-      helas_VVV1P0_1( w_fp[23], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[72] );
-      helas_VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[60] );
-      helas_VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[24] );
+      helas_CD_VVV1P0_1( w_fp[23], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[72] );
+      helas_CD_VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[60] );
+      helas_CD_VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[24] );
 
       // Amplitude(s) for diagram number 1205
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28160,7 +28160,7 @@ namespace mg5amcCpu
       jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28169,7 +28169,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28185,17 +28185,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1206
-      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[77], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[27] += amp_sv[0];
       jamp_sv[29] -= amp_sv[0];
       jamp_sv[40] -= amp_sv[0];
       jamp_sv[46] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[77], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[29] -= amp_sv[0];
       jamp_sv[37] += amp_sv[0];
       jamp_sv[40] -= amp_sv[0];
       jamp_sv[43] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[27] -= amp_sv[0];
       jamp_sv[37] += amp_sv[0];
       jamp_sv[43] += amp_sv[0];
@@ -28204,22 +28204,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 1207 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1207
-      helas_FFV1_1( w_fp[2], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
-      helas_FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      helas_FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
 
       // Amplitude(s) for diagram number 1207
-      helas_FFV1_0( w_fp[52], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[17] += amp_sv[0];
       jamp_sv[23] -= amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[16], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[16], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[23] -= amp_sv[0];
       jamp_sv[77] += amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[27], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[27], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[77] += amp_sv[0];
       jamp_sv[101] += amp_sv[0];
@@ -28231,7 +28231,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1208
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28240,7 +28240,7 @@ namespace mg5amcCpu
       jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28249,7 +28249,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28265,17 +28265,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1209
-      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[51] += amp_sv[0];
       jamp_sv[53] -= amp_sv[0];
       jamp_sv[64] -= amp_sv[0];
       jamp_sv[70] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[53] -= amp_sv[0];
       jamp_sv[61] += amp_sv[0];
       jamp_sv[64] -= amp_sv[0];
       jamp_sv[67] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[51] -= amp_sv[0];
       jamp_sv[61] += amp_sv[0];
       jamp_sv[67] += amp_sv[0];
@@ -28287,7 +28287,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1210
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28296,7 +28296,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[16], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -28305,7 +28305,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[27], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[27], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -28321,7 +28321,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1211
-      helas_FFV1_0( w_fp[71], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28330,7 +28330,7 @@ namespace mg5amcCpu
       jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28339,7 +28339,7 @@ namespace mg5amcCpu
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[66], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[66], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[34] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -28355,7 +28355,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1212
-      helas_VVV1_0( w_fp[23], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[23], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
       jamp_sv[22] += amp_sv[0];
@@ -28372,7 +28372,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVV1_0( w_fp[68], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[68], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[22] += amp_sv[0];
       jamp_sv[23] -= amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
@@ -28389,7 +28389,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[100] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      helas_VVV1_0( w_fp[29], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[29], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[16] += amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
@@ -28413,7 +28413,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1213
-      helas_VVVV1_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[16] += amp_sv[0];
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
@@ -28430,7 +28430,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[114] += amp_sv[0];
-      helas_VVVV3_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[16] += amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[22] -= amp_sv[0];
@@ -28447,7 +28447,7 @@ namespace mg5amcCpu
       jamp_sv[91] += amp_sv[0];
       jamp_sv[114] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
       jamp_sv[27] += amp_sv[0];
@@ -28464,7 +28464,7 @@ namespace mg5amcCpu
       jamp_sv[70] += amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -28481,7 +28481,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      helas_VVVV3_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -28498,7 +28498,7 @@ namespace mg5amcCpu
       jamp_sv[91] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[23] += amp_sv[0];
       jamp_sv[29] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -28515,7 +28515,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -28532,7 +28532,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -28549,7 +28549,7 @@ namespace mg5amcCpu
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVVV4_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[17] += amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -28570,12 +28570,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1214 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1214
-      helas_VVV1P0_1( w_fp[23], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[61] );
-      helas_VVV1P0_1( w_fp[68], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[23] );
-      helas_VVV1P0_1( w_fp[29], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[68] );
+      helas_CD_VVV1P0_1( w_fp[23], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[61] );
+      helas_CD_VVV1P0_1( w_fp[68], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_CD_VVV1P0_1( w_fp[29], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[68] );
 
       // Amplitude(s) for diagram number 1214
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[61], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[61], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[16] += amp_sv[0];
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
@@ -28592,7 +28592,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[114] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -28609,7 +28609,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[27] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
@@ -28633,7 +28633,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1215
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[72], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[72], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
       jamp_sv[27] += amp_sv[0];
@@ -28650,7 +28650,7 @@ namespace mg5amcCpu
       jamp_sv[70] += amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[23] += amp_sv[0];
       jamp_sv[29] -= amp_sv[0];
       jamp_sv[31] += amp_sv[0];
@@ -28667,7 +28667,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[17] += amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
       jamp_sv[30] += amp_sv[0];
@@ -28691,7 +28691,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1216
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28700,7 +28700,7 @@ namespace mg5amcCpu
       jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28709,7 +28709,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28725,17 +28725,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1217
-      helas_FFV1_0( w_fp[71], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[54] += amp_sv[0];
       jamp_sv[55] -= amp_sv[0];
       jamp_sv[57] -= amp_sv[0];
       jamp_sv[59] += amp_sv[0];
-      helas_FFV1_0( w_fp[21], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[55] -= amp_sv[0];
       jamp_sv[56] += amp_sv[0];
       jamp_sv[57] -= amp_sv[0];
       jamp_sv[58] += amp_sv[0];
-      helas_FFV1_0( w_fp[66], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[66], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[54] -= amp_sv[0];
       jamp_sv[56] += amp_sv[0];
       jamp_sv[58] += amp_sv[0];
@@ -28747,7 +28747,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1218
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28756,7 +28756,7 @@ namespace mg5amcCpu
       jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28765,7 +28765,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
@@ -28781,17 +28781,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1219
-      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[77], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[16] += amp_sv[0];
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[114] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[16], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[16], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[76] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[27], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[27], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[76] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
@@ -28803,7 +28803,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1220
-      helas_VVVV1_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -28820,7 +28820,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -28837,7 +28837,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
       jamp_sv[21] += amp_sv[0];
@@ -28854,7 +28854,7 @@ namespace mg5amcCpu
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      helas_VVVV1_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -28871,7 +28871,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -28888,7 +28888,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[19] += amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
@@ -28905,7 +28905,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      helas_VVVV1_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[12] += amp_sv[0];
@@ -28922,7 +28922,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[117] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[12] += amp_sv[0];
@@ -28939,7 +28939,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[117] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[18] += amp_sv[0];
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[22] -= amp_sv[0];
@@ -28960,12 +28960,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1221 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1221
-      helas_VVV1P0_1( w_fp[0], w_fp[73], COUPs[0], 1.0, 0., 0., w_fp[27] );
-      helas_VVV1P0_1( w_fp[0], w_fp[79], COUPs[0], 1.0, 0., 0., w_fp[1] );
-      helas_VVV1P0_1( w_fp[0], w_fp[80], COUPs[0], 1.0, 0., 0., w_fp[16] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[73], COUPs[0], 1.0, 0., 0., w_fp[27] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[79], COUPs[0], 1.0, 0., 0., w_fp[1] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[80], COUPs[0], 1.0, 0., 0., w_fp[16] );
 
       // Amplitude(s) for diagram number 1221
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -28982,7 +28982,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[1], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[1], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -28999,7 +28999,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[12] += amp_sv[0];
@@ -29023,7 +29023,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1222
-      helas_VVV1_0( w_fp[73], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[73], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -29040,7 +29040,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVV1_0( w_fp[79], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[79], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -29057,7 +29057,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_VVV1_0( w_fp[80], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[80], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[12] += amp_sv[0];
@@ -29081,7 +29081,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1223
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29090,7 +29090,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29099,7 +29099,7 @@ namespace mg5amcCpu
       jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -29115,17 +29115,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1224
-      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[113], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] += amp_sv[0];
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[113], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[113], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[96] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
@@ -29137,7 +29137,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1225
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29146,7 +29146,7 @@ namespace mg5amcCpu
       jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29155,7 +29155,7 @@ namespace mg5amcCpu
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -29171,17 +29171,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1226
-      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[32] += amp_sv[0];
       jamp_sv[38] -= amp_sv[0];
       jamp_sv[62] -= amp_sv[0];
       jamp_sv[86] += amp_sv[0];
-      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[38] -= amp_sv[0];
       jamp_sv[56] += amp_sv[0];
       jamp_sv[62] -= amp_sv[0];
       jamp_sv[80] += amp_sv[0];
-      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[32] -= amp_sv[0];
       jamp_sv[56] += amp_sv[0];
       jamp_sv[80] += amp_sv[0];
@@ -29193,7 +29193,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1227
-      helas_VVVV1_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -29210,7 +29210,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -29227,7 +29227,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[15] += amp_sv[0];
@@ -29244,7 +29244,7 @@ namespace mg5amcCpu
       jamp_sv[77] += amp_sv[0];
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -29261,7 +29261,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -29278,7 +29278,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[13] += amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
@@ -29295,7 +29295,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[18] += amp_sv[0];
@@ -29312,7 +29312,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[12] += amp_sv[0];
@@ -29329,7 +29329,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[12] += amp_sv[0];
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
@@ -29350,12 +29350,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1228 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1228
-      helas_VVV1P0_1( w_fp[0], w_fp[57], COUPs[0], 1.0, 0., 0., w_fp[62] );
-      helas_VVV1P0_1( w_fp[0], w_fp[81], COUPs[0], 1.0, 0., 0., w_fp[80] );
-      helas_VVV1P0_1( w_fp[0], w_fp[82], COUPs[0], 1.0, 0., 0., w_fp[79] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[57], COUPs[0], 1.0, 0., 0., w_fp[62] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[81], COUPs[0], 1.0, 0., 0., w_fp[80] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[82], COUPs[0], 1.0, 0., 0., w_fp[79] );
 
       // Amplitude(s) for diagram number 1228
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -29372,7 +29372,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[80], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[80], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -29389,7 +29389,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[79], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[79], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[18] += amp_sv[0];
@@ -29413,7 +29413,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1229
-      helas_VVV1_0( w_fp[57], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[57], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -29430,7 +29430,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      helas_VVV1_0( w_fp[81], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[81], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -29447,7 +29447,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
-      helas_VVV1_0( w_fp[82], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[82], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[12] += amp_sv[0];
@@ -29471,7 +29471,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1230
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29480,7 +29480,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29489,7 +29489,7 @@ namespace mg5amcCpu
       jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -29505,17 +29505,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1231
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] += amp_sv[0];
       jamp_sv[73] -= amp_sv[0];
       jamp_sv[75] -= amp_sv[0];
       jamp_sv[77] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[73] -= amp_sv[0];
       jamp_sv[74] += amp_sv[0];
       jamp_sv[75] -= amp_sv[0];
       jamp_sv[76] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[72] -= amp_sv[0];
       jamp_sv[74] += amp_sv[0];
       jamp_sv[76] += amp_sv[0];
@@ -29527,7 +29527,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1232
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29536,7 +29536,7 @@ namespace mg5amcCpu
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29545,7 +29545,7 @@ namespace mg5amcCpu
       jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -29561,17 +29561,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1233
-      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[34] += amp_sv[0];
       jamp_sv[44] -= amp_sv[0];
       jamp_sv[68] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[44] -= amp_sv[0];
       jamp_sv[58] += amp_sv[0];
       jamp_sv[68] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
-      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[34] -= amp_sv[0];
       jamp_sv[58] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
@@ -29583,7 +29583,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1234
-      helas_VVVV1_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
@@ -29600,7 +29600,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
@@ -29617,7 +29617,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[9] += amp_sv[0];
@@ -29634,7 +29634,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
@@ -29651,7 +29651,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -29668,7 +29668,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[7] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[9] += amp_sv[0];
@@ -29685,7 +29685,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[19] += amp_sv[0];
@@ -29702,7 +29702,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -29719,7 +29719,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
@@ -29740,12 +29740,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1235 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1235
-      helas_VVV1P0_1( w_fp[0], w_fp[55], COUPs[0], 1.0, 0., 0., w_fp[104] );
-      helas_VVV1P0_1( w_fp[0], w_fp[83], COUPs[0], 1.0, 0., 0., w_fp[82] );
-      helas_VVV1P0_1( w_fp[0], w_fp[84], COUPs[0], 1.0, 0., 0., w_fp[81] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[55], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[83], COUPs[0], 1.0, 0., 0., w_fp[82] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[84], COUPs[0], 1.0, 0., 0., w_fp[81] );
 
       // Amplitude(s) for diagram number 1235
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
@@ -29762,7 +29762,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[82], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[82], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
@@ -29779,7 +29779,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[81], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[81], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[19] += amp_sv[0];
@@ -29803,7 +29803,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1236
-      helas_VVV1_0( w_fp[55], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[55], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[6] -= amp_sv[0];
@@ -29820,7 +29820,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      helas_VVV1_0( w_fp[83], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[83], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -29837,7 +29837,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      helas_VVV1_0( w_fp[84], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[84], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
@@ -29861,7 +29861,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1237
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29870,7 +29870,7 @@ namespace mg5amcCpu
       jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29879,7 +29879,7 @@ namespace mg5amcCpu
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -29895,17 +29895,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1238
-      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[114], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] += amp_sv[0];
       jamp_sv[49] -= amp_sv[0];
       jamp_sv[51] -= amp_sv[0];
       jamp_sv[53] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[114], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[49] -= amp_sv[0];
       jamp_sv[50] += amp_sv[0];
       jamp_sv[51] -= amp_sv[0];
       jamp_sv[52] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[114], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[48] -= amp_sv[0];
       jamp_sv[50] += amp_sv[0];
       jamp_sv[52] += amp_sv[0];
@@ -29917,7 +29917,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1239
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29926,7 +29926,7 @@ namespace mg5amcCpu
       jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
@@ -29935,7 +29935,7 @@ namespace mg5amcCpu
       jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -29951,17 +29951,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1240
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[40] += amp_sv[0];
       jamp_sv[46] -= amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[46] -= amp_sv[0];
       jamp_sv[82] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[40] -= amp_sv[0];
       jamp_sv[82] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
diff --git a/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h
index 24e8114e3a..624de4a7b3 100644
--- a/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h
@@ -1404,178 +1404,347 @@ namespace mg5amcCpu
 
 #ifndef MGONGPU_LINKER_HELAMPS
 
-#define helas_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
-#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
-#define helas_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_VVVV3_0 VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
-#define helas_VVVV4_0 VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CD_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1P0_1 VVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_1 FFV1_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_2 FFV1_2<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1P0_3 FFV1P0_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV3_0 VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV3_0 VVVV3_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV4_0 VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV4_0 VVVV4_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CI_ACCESS>
 
 #else
 
-#define helas_VVV1_0 linker_VVV1_0
-#define helas_VVV1P0_1 linker_VVV1P0_1
-#define helas_FFV1_0 linker_FFV1_0
-#define helas_FFV1_1 linker_FFV1_1
-#define helas_FFV1_2 linker_FFV1_2
-#define helas_FFV1P0_3 linker_FFV1P0_3
-#define helas_VVVV1_0 linker_VVVV1_0
-#define helas_VVVV1P0_1 linker_VVVV1P0_1
-#define helas_VVVV3_0 linker_VVVV3_0
-#define helas_VVVV3P0_1 linker_VVVV3P0_1
-#define helas_VVVV4_0 linker_VVVV4_0
-#define helas_VVVV4P0_1 linker_VVVV4P0_1
+#define helas_CD_VVV1_0 linker_CD_VVV1_0
+#define helas_CI_VVV1_0 linker_CI_VVV1_0
+#define helas_CD_VVV1P0_1 linker_CD_VVV1P0_1
+#define helas_CI_VVV1P0_1 linker_CI_VVV1P0_1
+#define helas_CD_FFV1_0 linker_CD_FFV1_0
+#define helas_CI_FFV1_0 linker_CI_FFV1_0
+#define helas_CD_FFV1_1 linker_CD_FFV1_1
+#define helas_CI_FFV1_1 linker_CI_FFV1_1
+#define helas_CD_FFV1_2 linker_CD_FFV1_2
+#define helas_CI_FFV1_2 linker_CI_FFV1_2
+#define helas_CD_FFV1P0_3 linker_CD_FFV1P0_3
+#define helas_CI_FFV1P0_3 linker_CI_FFV1P0_3
+#define helas_CD_VVVV1_0 linker_CD_VVVV1_0
+#define helas_CI_VVVV1_0 linker_CI_VVVV1_0
+#define helas_CD_VVVV1P0_1 linker_CD_VVVV1P0_1
+#define helas_CI_VVVV1P0_1 linker_CI_VVVV1P0_1
+#define helas_CD_VVVV3_0 linker_CD_VVVV3_0
+#define helas_CI_VVVV3_0 linker_CI_VVVV3_0
+#define helas_CD_VVVV3P0_1 linker_CD_VVVV3P0_1
+#define helas_CI_VVVV3P0_1 linker_CI_VVVV3P0_1
+#define helas_CD_VVVV4_0 linker_CD_VVVV4_0
+#define helas_CI_VVVV4_0 linker_CI_VVVV4_0
+#define helas_CD_VVVV4P0_1 linker_CD_VVVV4P0_1
+#define helas_CI_VVVV4P0_1 linker_CI_VVVV4P0_1
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] );
+  linker_CI_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] );
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] );
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] );
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV1_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] );
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV1P0_1( const fptype allV2[],
+  linker_CI_FFV1_1( const fptype allF2[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
                     const fptype M1,
                     const fptype W1,
-                    fptype allV1[] );
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV3_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] );
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV3P0_1( const fptype allV2[],
+  linker_CI_FFV1_2( const fptype allF1[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] );
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
   __device__ void
-  linker_VVVV4_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] );
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
   __device__ void
-  linker_VVVV4P0_1( const fptype allV2[],
-                    const fptype allV3[],
-                    const fptype allV4[],
-                    const fptype allCOUP[],
-                    const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] );
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV4_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV4_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt
index 404d5a1549..3b48b57384 100644
--- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt
+++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~ g g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.0058324337005615234 [0m
+[1;32mDEBUG: model prefixing  takes 0.005753278732299805 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1  
 INFO: Process has 1240 diagrams 
-1 processes with 1240 diagrams generated in 1.936 s
+1 processes with 1240 diagrams generated in 1.962 s
 Total: 1 processes with 1240 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg
 Load PLUGIN.CUDACPP_OUTPUT
@@ -178,14 +178,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. 
-Generated helas calls for 1 subprocesses (1240 diagrams) in 6.790 s
+Generated helas calls for 1 subprocesses (1240 diagrams) in 6.855 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.365 s
+ALOHA: aloha creates 5 routines in  0.364 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -210,7 +210,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. 
 quit
 
-real	0m13.502s
-user	0m13.358s
-sys	0m0.089s
+real	0m13.568s
+user	0m13.380s
+sys	0m0.131s
 Code generation completed in 13 seconds
diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/HelAmps.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/HelAmps.cc
index 845cf9fd87..ebe42b3ce3 100644
--- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/HelAmps.cc
@@ -62,185 +62,366 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] )
+  linker_CI_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+    return VVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
-    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] )
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
-    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+    return VVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] )
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] )
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+    return FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV1_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] )
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
   {
-    return VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV1P0_1( const fptype allV2[],
+  linker_CI_FFV1_1( const fptype allF2[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
                     const fptype M1,
                     const fptype W1,
-                    fptype allV1[] )
+                    fptype allF1[] )
   {
-    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+    return FFV1_1<W_ACCESS, CI_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV3_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] )
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
   {
-    return VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV3P0_1( const fptype allV2[],
+  linker_CI_FFV1_2( const fptype allF1[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] )
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CI_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV3_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
   {
     return VVVV3P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV3P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
   __device__ void
-  linker_VVVV4_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] )
+  linker_CD_VVVV4_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
   {
     return VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
   __device__ void
-  linker_VVVV4P0_1( const fptype allV2[],
-                    const fptype allV3[],
-                    const fptype allV4[],
-                    const fptype allCOUP[],
-                    const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] )
+  linker_CI_VVVV4_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV4_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
   {
     return VVVV4P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV4P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
 }
 #endif
diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc
index 4879802d7f..fbcdb34f08 100644
--- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc
+++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc
@@ -338,13 +338,13 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][6], +1, w_fp[6], 6 );
 
-      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] );
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
-      helas_VVV1P0_1( w_fp[7], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[9] );
-      helas_VVV1P0_1( w_fp[8], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_CD_VVV1P0_1( w_fp[7], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[9] );
+      helas_CD_VVV1P0_1( w_fp[8], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 1
-      helas_VVV1_0( w_fp[9], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[9], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -368,10 +368,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 1240 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_VVV1P0_1( w_fp[8], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[11] );
+      helas_CD_VVV1P0_1( w_fp[8], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 2
-      helas_VVV1_0( w_fp[9], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[9], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -398,7 +398,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 3
-      helas_VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -418,7 +418,7 @@ namespace mg5amcCpu
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      helas_VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -438,7 +438,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[118] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -462,11 +462,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 1240 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_VVV1P0_1( w_fp[7], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[12] );
-      helas_VVV1P0_1( w_fp[8], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[13] );
+      helas_CD_VVV1P0_1( w_fp[7], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[12] );
+      helas_CD_VVV1P0_1( w_fp[8], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[13] );
 
       // Amplitude(s) for diagram number 4
-      helas_VVV1_0( w_fp[12], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[12], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -493,7 +493,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 5
-      helas_VVV1_0( w_fp[12], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[12], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -520,7 +520,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -540,7 +540,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -560,7 +560,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -584,10 +584,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 1240 ***
 
       // Wavefunction(s) for diagram number 7
-      helas_VVV1P0_1( w_fp[7], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[14] );
+      helas_CD_VVV1P0_1( w_fp[7], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 7
-      helas_VVV1_0( w_fp[14], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[14], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -614,7 +614,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 8
-      helas_VVV1_0( w_fp[14], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[14], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -641,7 +641,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 9
-      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -661,7 +661,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[97] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -681,7 +681,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -705,12 +705,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 1240 ***
 
       // Wavefunction(s) for diagram number 10
-      helas_VVVV1P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] );
-      helas_VVVV3P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[16] );
-      helas_VVVV4P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] );
+      helas_CD_VVVV1P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] );
+      helas_CD_VVVV3P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[16] );
+      helas_CD_VVVV4P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] );
 
       // Amplitude(s) for diagram number 10
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[15], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[15], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -730,7 +730,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -750,7 +750,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -774,12 +774,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 1240 ***
 
       // Wavefunction(s) for diagram number 11
-      helas_VVVV1P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[18] );
-      helas_VVVV3P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[19] );
-      helas_VVVV4P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[20] );
+      helas_CD_VVVV1P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[18] );
+      helas_CD_VVVV3P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[19] );
+      helas_CD_VVVV4P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[20] );
 
       // Amplitude(s) for diagram number 11
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[18], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[18], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -799,7 +799,7 @@ namespace mg5amcCpu
       jamp_sv[95] += amp_sv[0];
       jamp_sv[108] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -819,7 +819,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -843,12 +843,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 1240 ***
 
       // Wavefunction(s) for diagram number 12
-      helas_VVVV1P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] );
-      helas_VVVV3P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] );
-      helas_VVVV4P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] );
+      helas_CD_VVVV1P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVVV3P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] );
+      helas_CD_VVVV4P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] );
 
       // Amplitude(s) for diagram number 12
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -868,7 +868,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[114] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -888,7 +888,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[97] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -912,10 +912,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 13 OF 1240 ***
 
       // Wavefunction(s) for diagram number 13
-      helas_VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[24] );
+      helas_CD_VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[24] );
 
       // Amplitude(s) for diagram number 13
-      helas_VVVV1_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -935,7 +935,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV3_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -955,7 +955,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV4_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -979,10 +979,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 14 OF 1240 ***
 
       // Wavefunction(s) for diagram number 14
-      helas_VVV1P0_1( w_fp[7], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[25] );
+      helas_CD_VVV1P0_1( w_fp[7], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[25] );
 
       // Amplitude(s) for diagram number 14
-      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[24], w_fp[6], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1006,10 +1006,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 15 OF 1240 ***
 
       // Wavefunction(s) for diagram number 15
-      helas_VVV1P0_1( w_fp[7], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[26] );
+      helas_CD_VVV1P0_1( w_fp[7], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[26] );
 
       // Amplitude(s) for diagram number 15
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[26], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[26], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1036,7 +1036,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 16
-      helas_VVV1_0( w_fp[8], w_fp[24], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[24], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1060,10 +1060,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 17 OF 1240 ***
 
       // Wavefunction(s) for diagram number 17
-      helas_VVV1P0_1( w_fp[4], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[27] );
+      helas_CD_VVV1P0_1( w_fp[4], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[27] );
 
       // Amplitude(s) for diagram number 17
-      helas_VVVV1_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1083,7 +1083,7 @@ namespace mg5amcCpu
       jamp_sv[95] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1103,7 +1103,7 @@ namespace mg5amcCpu
       jamp_sv[95] += amp_sv[0];
       jamp_sv[108] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1130,7 +1130,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 18
-      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[27], w_fp[5], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1154,10 +1154,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 19 OF 1240 ***
 
       // Wavefunction(s) for diagram number 19
-      helas_VVV1P0_1( w_fp[7], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[28] );
+      helas_CD_VVV1P0_1( w_fp[7], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[28] );
 
       // Amplitude(s) for diagram number 19
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[28], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[28], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1184,7 +1184,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 20
-      helas_VVV1_0( w_fp[8], w_fp[27], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[27], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1208,10 +1208,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 21 OF 1240 ***
 
       // Wavefunction(s) for diagram number 21
-      helas_VVV1P0_1( w_fp[5], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] );
+      helas_CD_VVV1P0_1( w_fp[5], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] );
 
       // Amplitude(s) for diagram number 21
-      helas_VVVV1_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1231,7 +1231,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV3_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1251,7 +1251,7 @@ namespace mg5amcCpu
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV4_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1278,7 +1278,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 22
-      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[29], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1305,7 +1305,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 23
-      helas_VVV1_0( w_fp[8], w_fp[29], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[29], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1329,10 +1329,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 24 OF 1240 ***
 
       // Wavefunction(s) for diagram number 24
-      helas_VVV1P0_1( w_fp[7], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[25] );
+      helas_CD_VVV1P0_1( w_fp[7], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[25] );
 
       // Amplitude(s) for diagram number 24
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1356,12 +1356,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 25 OF 1240 ***
 
       // Wavefunction(s) for diagram number 25
-      helas_VVVV1P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[30] );
-      helas_VVVV3P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[31] );
-      helas_VVVV4P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[32] );
+      helas_CD_VVVV1P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[30] );
+      helas_CD_VVVV3P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[31] );
+      helas_CD_VVVV4P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[32] );
 
       // Amplitude(s) for diagram number 25
-      helas_VVV1_0( w_fp[7], w_fp[8], w_fp[30], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[8], w_fp[30], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1381,7 +1381,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVV1_0( w_fp[7], w_fp[8], w_fp[31], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[8], w_fp[31], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1401,7 +1401,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_VVV1_0( w_fp[7], w_fp[8], w_fp[32], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[8], w_fp[32], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1425,12 +1425,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 26 OF 1240 ***
 
       // Wavefunction(s) for diagram number 26
-      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[33] );
-      helas_FFV1_2( w_fp[3], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
-      helas_FFV1_1( w_fp[33], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[35] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[33] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
+      helas_CD_FFV1_1( w_fp[33], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[35] );
 
       // Amplitude(s) for diagram number 26
-      helas_FFV1_0( w_fp[34], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1440,10 +1440,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 27 OF 1240 ***
 
       // Wavefunction(s) for diagram number 27
-      helas_FFV1_1( w_fp[33], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[36] );
+      helas_CD_FFV1_1( w_fp[33], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[36] );
 
       // Amplitude(s) for diagram number 27
-      helas_FFV1_0( w_fp[34], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1453,10 +1453,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 28 OF 1240 ***
 
       // Wavefunction(s) for diagram number 28
-      helas_FFV1P0_3( w_fp[3], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[37] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[37] );
 
       // Amplitude(s) for diagram number 28
-      helas_VVV1_0( w_fp[12], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[12], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1475,7 +1475,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 29
-      helas_FFV1_0( w_fp[3], w_fp[36], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[36], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1490,7 +1490,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 30
-      helas_VVV1_0( w_fp[14], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[14], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1509,7 +1509,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 31
-      helas_FFV1_0( w_fp[3], w_fp[35], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[35], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1524,7 +1524,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 32
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1536,7 +1536,7 @@ namespace mg5amcCpu
       jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1548,7 +1548,7 @@ namespace mg5amcCpu
       jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1564,11 +1564,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 33 OF 1240 ***
 
       // Wavefunction(s) for diagram number 33
-      helas_FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[38] );
-      helas_FFV1_1( w_fp[33], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[38] );
+      helas_CD_FFV1_1( w_fp[33], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] );
 
       // Amplitude(s) for diagram number 33
-      helas_FFV1_0( w_fp[38], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1578,10 +1578,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 34 OF 1240 ***
 
       // Wavefunction(s) for diagram number 34
-      helas_FFV1_2( w_fp[38], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] );
+      helas_CD_FFV1_2( w_fp[38], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] );
 
       // Amplitude(s) for diagram number 34
-      helas_FFV1_0( w_fp[40], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1594,7 +1594,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 35
-      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[33], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1606,10 +1606,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 36 OF 1240 ***
 
       // Wavefunction(s) for diagram number 36
-      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[41] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[41] );
 
       // Amplitude(s) for diagram number 36
-      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[39], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1619,10 +1619,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 37 OF 1240 ***
 
       // Wavefunction(s) for diagram number 37
-      helas_FFV1_2( w_fp[41], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[42] );
+      helas_CD_FFV1_2( w_fp[41], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[42] );
 
       // Amplitude(s) for diagram number 37
-      helas_FFV1_0( w_fp[42], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[42], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1635,7 +1635,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 38
-      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[33], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1650,7 +1650,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 39
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1665,7 +1665,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 40
-      helas_FFV1_0( w_fp[34], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1680,7 +1680,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 41
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[25], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[25], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1696,11 +1696,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 42 OF 1240 ***
 
       // Wavefunction(s) for diagram number 42
-      helas_FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] );
-      helas_FFV1_1( w_fp[39], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[43] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] );
+      helas_CD_FFV1_1( w_fp[39], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[43] );
 
       // Amplitude(s) for diagram number 42
-      helas_FFV1_0( w_fp[34], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1710,10 +1710,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 43 OF 1240 ***
 
       // Wavefunction(s) for diagram number 43
-      helas_FFV1_1( w_fp[39], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[44] );
+      helas_CD_FFV1_1( w_fp[39], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[44] );
 
       // Amplitude(s) for diagram number 43
-      helas_FFV1_0( w_fp[34], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1723,10 +1723,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 44 OF 1240 ***
 
       // Wavefunction(s) for diagram number 44
-      helas_FFV1P0_3( w_fp[3], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[45] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[45] );
 
       // Amplitude(s) for diagram number 44
-      helas_VVV1_0( w_fp[9], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[9], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1745,7 +1745,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 45
-      helas_FFV1_0( w_fp[3], w_fp[44], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[44], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1760,7 +1760,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 46
-      helas_VVV1_0( w_fp[14], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[14], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1779,7 +1779,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 47
-      helas_FFV1_0( w_fp[3], w_fp[43], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[43], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1794,7 +1794,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 48
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1806,7 +1806,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1818,7 +1818,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1834,11 +1834,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 49 OF 1240 ***
 
       // Wavefunction(s) for diagram number 49
-      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[46] );
-      helas_FFV1_1( w_fp[39], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[46] );
+      helas_CD_FFV1_1( w_fp[39], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] );
 
       // Amplitude(s) for diagram number 49
-      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[47], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1848,10 +1848,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 50 OF 1240 ***
 
       // Wavefunction(s) for diagram number 50
-      helas_FFV1_2( w_fp[46], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] );
+      helas_CD_FFV1_2( w_fp[46], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] );
 
       // Amplitude(s) for diagram number 50
-      helas_FFV1_0( w_fp[48], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1864,7 +1864,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 51
-      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[39], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1879,7 +1879,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 52
-      helas_FFV1_0( w_fp[41], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1892,7 +1892,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 53
-      helas_FFV1_0( w_fp[42], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[42], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1905,7 +1905,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 54
-      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[39], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1920,7 +1920,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 55
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1935,7 +1935,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 56
-      helas_FFV1_0( w_fp[34], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1950,7 +1950,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 57
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[28], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[28], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1966,11 +1966,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 58 OF 1240 ***
 
       // Wavefunction(s) for diagram number 58
-      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] );
-      helas_FFV1_1( w_fp[47], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[49] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] );
+      helas_CD_FFV1_1( w_fp[47], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[49] );
 
       // Amplitude(s) for diagram number 58
-      helas_FFV1_0( w_fp[34], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1980,10 +1980,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 59 OF 1240 ***
 
       // Wavefunction(s) for diagram number 59
-      helas_FFV1_1( w_fp[47], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[50] );
+      helas_CD_FFV1_1( w_fp[47], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[50] );
 
       // Amplitude(s) for diagram number 59
-      helas_FFV1_0( w_fp[34], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1993,10 +1993,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 60 OF 1240 ***
 
       // Wavefunction(s) for diagram number 60
-      helas_FFV1P0_3( w_fp[3], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[51] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[51] );
 
       // Amplitude(s) for diagram number 60
-      helas_VVV1_0( w_fp[9], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[9], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2015,7 +2015,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 61
-      helas_FFV1_0( w_fp[3], w_fp[50], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[50], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2030,7 +2030,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 62
-      helas_VVV1_0( w_fp[12], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[12], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2049,7 +2049,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 63
-      helas_FFV1_0( w_fp[3], w_fp[49], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[49], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2064,7 +2064,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 64
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2076,7 +2076,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2088,7 +2088,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2104,10 +2104,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 65 OF 1240 ***
 
       // Wavefunction(s) for diagram number 65
-      helas_FFV1_1( w_fp[47], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
+      helas_CD_FFV1_1( w_fp[47], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
 
       // Amplitude(s) for diagram number 65
-      helas_FFV1_0( w_fp[46], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2120,7 +2120,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 66
-      helas_FFV1_0( w_fp[48], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2133,7 +2133,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 67
-      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[47], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2148,7 +2148,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 68
-      helas_FFV1_0( w_fp[38], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2161,7 +2161,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 69
-      helas_FFV1_0( w_fp[40], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2174,7 +2174,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 70
-      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[47], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2189,7 +2189,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 71
-      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[52], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2204,7 +2204,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 72
-      helas_FFV1_0( w_fp[34], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2219,7 +2219,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 73
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[26], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[26], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2235,11 +2235,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 74 OF 1240 ***
 
       // Wavefunction(s) for diagram number 74
-      helas_FFV1_1( w_fp[2], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
-      helas_FFV1_2( w_fp[46], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
+      helas_CD_FFV1_2( w_fp[46], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
 
       // Amplitude(s) for diagram number 74
-      helas_FFV1_0( w_fp[7], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2249,10 +2249,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 75 OF 1240 ***
 
       // Wavefunction(s) for diagram number 75
-      helas_FFV1_2( w_fp[46], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[53] );
+      helas_CD_FFV1_2( w_fp[46], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[53] );
 
       // Amplitude(s) for diagram number 75
-      helas_FFV1_0( w_fp[53], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2262,10 +2262,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 76 OF 1240 ***
 
       // Wavefunction(s) for diagram number 76
-      helas_FFV1P0_3( w_fp[46], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[54] );
+      helas_CD_FFV1P0_3( w_fp[46], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[54] );
 
       // Amplitude(s) for diagram number 76
-      helas_VVV1_0( w_fp[12], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[12], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2284,7 +2284,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 77
-      helas_FFV1_0( w_fp[53], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2299,7 +2299,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 78
-      helas_VVV1_0( w_fp[14], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[14], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2318,7 +2318,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 79
-      helas_FFV1_0( w_fp[7], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2333,7 +2333,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 80
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2345,7 +2345,7 @@ namespace mg5amcCpu
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2357,7 +2357,7 @@ namespace mg5amcCpu
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2376,7 +2376,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 81
-      helas_FFV1_0( w_fp[46], w_fp[52], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[52], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2391,7 +2391,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 82
-      helas_FFV1_0( w_fp[48], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2406,7 +2406,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 83
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[25], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[25], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2422,10 +2422,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 84 OF 1240 ***
 
       // Wavefunction(s) for diagram number 84
-      helas_FFV1_2( w_fp[38], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[25] );
+      helas_CD_FFV1_2( w_fp[38], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[25] );
 
       // Amplitude(s) for diagram number 84
-      helas_FFV1_0( w_fp[25], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2435,10 +2435,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 85 OF 1240 ***
 
       // Wavefunction(s) for diagram number 85
-      helas_FFV1_2( w_fp[38], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] );
+      helas_CD_FFV1_2( w_fp[38], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] );
 
       // Amplitude(s) for diagram number 85
-      helas_FFV1_0( w_fp[48], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2448,10 +2448,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 86 OF 1240 ***
 
       // Wavefunction(s) for diagram number 86
-      helas_FFV1P0_3( w_fp[38], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[23] );
+      helas_CD_FFV1P0_3( w_fp[38], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[23] );
 
       // Amplitude(s) for diagram number 86
-      helas_VVV1_0( w_fp[9], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[9], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2470,7 +2470,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 87
-      helas_FFV1_0( w_fp[48], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2485,7 +2485,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 88
-      helas_VVV1_0( w_fp[14], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[14], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2504,7 +2504,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 89
-      helas_FFV1_0( w_fp[25], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2519,7 +2519,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 90
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2531,7 +2531,7 @@ namespace mg5amcCpu
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2543,7 +2543,7 @@ namespace mg5amcCpu
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2562,7 +2562,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 91
-      helas_FFV1_0( w_fp[38], w_fp[52], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[52], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2577,7 +2577,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 92
-      helas_FFV1_0( w_fp[40], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2592,7 +2592,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 93
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[28], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[28], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2608,10 +2608,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 94 OF 1240 ***
 
       // Wavefunction(s) for diagram number 94
-      helas_FFV1_2( w_fp[41], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[28] );
+      helas_CD_FFV1_2( w_fp[41], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[28] );
 
       // Amplitude(s) for diagram number 94
-      helas_FFV1_0( w_fp[28], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2621,10 +2621,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 95 OF 1240 ***
 
       // Wavefunction(s) for diagram number 95
-      helas_FFV1_2( w_fp[41], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] );
+      helas_CD_FFV1_2( w_fp[41], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] );
 
       // Amplitude(s) for diagram number 95
-      helas_FFV1_0( w_fp[40], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2634,10 +2634,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 96 OF 1240 ***
 
       // Wavefunction(s) for diagram number 96
-      helas_FFV1P0_3( w_fp[41], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[20] );
+      helas_CD_FFV1P0_3( w_fp[41], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[20] );
 
       // Amplitude(s) for diagram number 96
-      helas_VVV1_0( w_fp[9], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[9], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2656,7 +2656,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 97
-      helas_FFV1_0( w_fp[40], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2671,7 +2671,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 98
-      helas_VVV1_0( w_fp[12], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[12], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2690,7 +2690,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 99
-      helas_FFV1_0( w_fp[28], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2705,7 +2705,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 100
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2717,7 +2717,7 @@ namespace mg5amcCpu
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2729,7 +2729,7 @@ namespace mg5amcCpu
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2748,7 +2748,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 101
-      helas_FFV1_0( w_fp[41], w_fp[52], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[52], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2763,7 +2763,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 102
-      helas_FFV1_0( w_fp[42], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[42], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2778,7 +2778,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 103
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[26], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[26], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2794,10 +2794,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 104 OF 1240 ***
 
       // Wavefunction(s) for diagram number 104
-      helas_FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[26] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[26] );
 
       // Amplitude(s) for diagram number 104
-      helas_FFV1_0( w_fp[26], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[52], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2809,10 +2809,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 105 OF 1240 ***
 
       // Wavefunction(s) for diagram number 105
-      helas_VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[42] );
+      helas_CD_VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[42] );
 
       // Amplitude(s) for diagram number 105
-      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[52], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2828,10 +2828,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 106 OF 1240 ***
 
       // Wavefunction(s) for diagram number 106
-      helas_FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
 
       // Amplitude(s) for diagram number 106
-      helas_FFV1_0( w_fp[34], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2846,7 +2846,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 107
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2865,7 +2865,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 108
-      helas_FFV1_0( w_fp[3], w_fp[17], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[17], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2884,7 +2884,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 109
-      helas_FFV1_0( w_fp[26], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2900,10 +2900,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 110 OF 1240 ***
 
       // Wavefunction(s) for diagram number 110
-      helas_FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
 
       // Amplitude(s) for diagram number 110
-      helas_FFV1_0( w_fp[14], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[52], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2915,10 +2915,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 111 OF 1240 ***
 
       // Wavefunction(s) for diagram number 111
-      helas_VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] );
+      helas_CD_VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] );
 
       // Amplitude(s) for diagram number 111
-      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[52], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2934,10 +2934,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 112 OF 1240 ***
 
       // Wavefunction(s) for diagram number 112
-      helas_FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
 
       // Amplitude(s) for diagram number 112
-      helas_FFV1_0( w_fp[34], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2952,7 +2952,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 113
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2971,7 +2971,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 114
-      helas_FFV1_0( w_fp[3], w_fp[15], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[15], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -2990,7 +2990,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 115
-      helas_FFV1_0( w_fp[14], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3006,10 +3006,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 116 OF 1240 ***
 
       // Wavefunction(s) for diagram number 116
-      helas_FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 116
-      helas_FFV1_0( w_fp[12], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[52], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3021,10 +3021,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 117 OF 1240 ***
 
       // Wavefunction(s) for diagram number 117
-      helas_VVV1P0_1( w_fp[4], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[19] );
+      helas_CD_VVV1P0_1( w_fp[4], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[19] );
 
       // Amplitude(s) for diagram number 117
-      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[52], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3040,10 +3040,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 118 OF 1240 ***
 
       // Wavefunction(s) for diagram number 118
-      helas_FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[18] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[18] );
 
       // Amplitude(s) for diagram number 118
-      helas_FFV1_0( w_fp[34], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3058,7 +3058,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 119
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3077,7 +3077,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 120
-      helas_FFV1_0( w_fp[3], w_fp[18], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[18], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3096,7 +3096,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 121
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3115,7 +3115,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 122
-      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[52], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3127,7 +3127,7 @@ namespace mg5amcCpu
       jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[52], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3139,7 +3139,7 @@ namespace mg5amcCpu
       jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[52], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[52], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3158,7 +3158,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 123
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3170,7 +3170,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3182,7 +3182,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3198,13 +3198,13 @@ namespace mg5amcCpu
       // *** DIAGRAM 124 OF 1240 ***
 
       // Wavefunction(s) for diagram number 124
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
-      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
-      helas_FFV1_1( w_fp[34], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
-      helas_FFV1_2( w_fp[52], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] );
+      helas_CD_FFV1_1( w_fp[34], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_2( w_fp[52], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
 
       // Amplitude(s) for diagram number 124
-      helas_FFV1_0( w_fp[22], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3213,10 +3213,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 125 OF 1240 ***
 
       // Wavefunction(s) for diagram number 125
-      helas_FFV1_2( w_fp[52], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_CD_FFV1_2( w_fp[52], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 125
-      helas_FFV1_0( w_fp[21], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3225,11 +3225,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 126 OF 1240 ***
 
       // Wavefunction(s) for diagram number 126
-      helas_FFV1_1( w_fp[34], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[55] );
-      helas_FFV1_2( w_fp[52], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[56] );
+      helas_CD_FFV1_1( w_fp[34], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[55] );
+      helas_CD_FFV1_2( w_fp[52], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[56] );
 
       // Amplitude(s) for diagram number 126
-      helas_FFV1_0( w_fp[56], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3241,7 +3241,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 127
-      helas_FFV1_0( w_fp[21], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3250,10 +3250,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 128 OF 1240 ***
 
       // Wavefunction(s) for diagram number 128
-      helas_FFV1_1( w_fp[34], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[57] );
+      helas_CD_FFV1_1( w_fp[34], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[57] );
 
       // Amplitude(s) for diagram number 128
-      helas_FFV1_0( w_fp[56], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3265,7 +3265,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 129
-      helas_FFV1_0( w_fp[22], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3274,10 +3274,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 130 OF 1240 ***
 
       // Wavefunction(s) for diagram number 130
-      helas_FFV1P0_3( w_fp[52], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[58] );
+      helas_CD_FFV1P0_3( w_fp[52], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[58] );
 
       // Amplitude(s) for diagram number 130
-      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[24], w_fp[6], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3289,10 +3289,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 131 OF 1240 ***
 
       // Wavefunction(s) for diagram number 131
-      helas_FFV1_1( w_fp[34], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
+      helas_CD_FFV1_1( w_fp[34], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
 
       // Amplitude(s) for diagram number 131
-      helas_FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3305,7 +3305,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 132
-      helas_FFV1_0( w_fp[52], w_fp[57], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[57], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3318,7 +3318,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 133
-      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[27], w_fp[5], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3330,10 +3330,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 134 OF 1240 ***
 
       // Wavefunction(s) for diagram number 134
-      helas_FFV1_1( w_fp[34], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
+      helas_CD_FFV1_1( w_fp[34], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
 
       // Amplitude(s) for diagram number 134
-      helas_FFV1_0( w_fp[52], w_fp[60], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[60], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3346,7 +3346,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 135
-      helas_FFV1_0( w_fp[52], w_fp[55], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[55], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3359,7 +3359,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 136
-      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[29], w_fp[58], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3374,7 +3374,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 137
-      helas_FFV1_0( w_fp[52], w_fp[9], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[9], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3384,10 +3384,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 138 OF 1240 ***
 
       // Wavefunction(s) for diagram number 138
-      helas_FFV1_1( w_fp[34], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] );
+      helas_CD_FFV1_1( w_fp[34], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] );
 
       // Amplitude(s) for diagram number 138
-      helas_FFV1_0( w_fp[52], w_fp[58], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[58], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3400,7 +3400,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 139
-      helas_FFV1_0( w_fp[52], w_fp[34], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[34], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3408,7 +3408,7 @@ namespace mg5amcCpu
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[34], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[34], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3416,7 +3416,7 @@ namespace mg5amcCpu
       jamp_sv[15] += amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[34], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[34], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3428,12 +3428,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 140 OF 1240 ***
 
       // Wavefunction(s) for diagram number 140
-      helas_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[61] );
-      helas_FFV1P0_3( w_fp[3], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[62] );
-      helas_VVV1P0_1( w_fp[61], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[63] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[61] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[62] );
+      helas_CD_VVV1P0_1( w_fp[61], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[63] );
 
       // Amplitude(s) for diagram number 140
-      helas_VVV1_0( w_fp[62], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3449,10 +3449,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 141 OF 1240 ***
 
       // Wavefunction(s) for diagram number 141
-      helas_VVV1P0_1( w_fp[61], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[64] );
+      helas_CD_VVV1P0_1( w_fp[61], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[64] );
 
       // Amplitude(s) for diagram number 141
-      helas_VVV1_0( w_fp[62], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3471,7 +3471,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 142
-      helas_VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3483,7 +3483,7 @@ namespace mg5amcCpu
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3495,7 +3495,7 @@ namespace mg5amcCpu
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3511,10 +3511,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 143 OF 1240 ***
 
       // Wavefunction(s) for diagram number 143
-      helas_FFV1_2( w_fp[3], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[65] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[65] );
 
       // Amplitude(s) for diagram number 143
-      helas_FFV1_0( w_fp[65], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3527,7 +3527,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 144
-      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[55], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3542,7 +3542,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 145
-      helas_FFV1_0( w_fp[65], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3555,7 +3555,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 146
-      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[57], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3567,10 +3567,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 147 OF 1240 ***
 
       // Wavefunction(s) for diagram number 147
-      helas_FFV1_1( w_fp[34], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] );
+      helas_CD_FFV1_1( w_fp[34], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] );
 
       // Amplitude(s) for diagram number 147
-      helas_FFV1_0( w_fp[38], w_fp[66], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[66], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3580,10 +3580,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 148 OF 1240 ***
 
       // Wavefunction(s) for diagram number 148
-      helas_FFV1P0_3( w_fp[38], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[67] );
+      helas_CD_FFV1P0_3( w_fp[38], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[67] );
 
       // Amplitude(s) for diagram number 148
-      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[6], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3598,7 +3598,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 149
-      helas_FFV1_0( w_fp[38], w_fp[57], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[57], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3611,7 +3611,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 150
-      helas_FFV1_0( w_fp[41], w_fp[66], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[66], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3621,10 +3621,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 151 OF 1240 ***
 
       // Wavefunction(s) for diagram number 151
-      helas_FFV1P0_3( w_fp[41], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[68] );
+      helas_CD_FFV1P0_3( w_fp[41], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[68] );
 
       // Amplitude(s) for diagram number 151
-      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[5], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3639,7 +3639,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 152
-      helas_FFV1_0( w_fp[41], w_fp[55], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[55], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3652,7 +3652,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 153
-      helas_FFV1_0( w_fp[3], w_fp[66], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[66], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3667,7 +3667,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 154
-      helas_VVV1_0( w_fp[61], w_fp[29], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[29], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3686,7 +3686,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 155
-      helas_FFV1_0( w_fp[3], w_fp[58], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[58], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3698,11 +3698,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 156 OF 1240 ***
 
       // Wavefunction(s) for diagram number 156
-      helas_VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[66] );
-      helas_VVV1P0_1( w_fp[66], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[69] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[66] );
+      helas_CD_VVV1P0_1( w_fp[66], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[69] );
 
       // Amplitude(s) for diagram number 156
-      helas_VVV1_0( w_fp[62], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3718,10 +3718,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 157 OF 1240 ***
 
       // Wavefunction(s) for diagram number 157
-      helas_VVV1P0_1( w_fp[66], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[70] );
+      helas_CD_VVV1P0_1( w_fp[66], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[70] );
 
       // Amplitude(s) for diagram number 157
-      helas_VVV1_0( w_fp[62], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3740,7 +3740,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 158
-      helas_VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3752,7 +3752,7 @@ namespace mg5amcCpu
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3764,7 +3764,7 @@ namespace mg5amcCpu
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3780,10 +3780,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 159 OF 1240 ***
 
       // Wavefunction(s) for diagram number 159
-      helas_FFV1_2( w_fp[3], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
 
       // Amplitude(s) for diagram number 159
-      helas_FFV1_0( w_fp[71], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3796,7 +3796,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 160
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3811,7 +3811,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 161
-      helas_FFV1_0( w_fp[71], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3824,7 +3824,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 162
-      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[57], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3836,10 +3836,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 163 OF 1240 ***
 
       // Wavefunction(s) for diagram number 163
-      helas_FFV1_1( w_fp[34], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] );
+      helas_CD_FFV1_1( w_fp[34], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] );
 
       // Amplitude(s) for diagram number 163
-      helas_FFV1_0( w_fp[46], w_fp[72], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[72], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3849,10 +3849,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 164 OF 1240 ***
 
       // Wavefunction(s) for diagram number 164
-      helas_FFV1P0_3( w_fp[46], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[73] );
+      helas_CD_FFV1P0_3( w_fp[46], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[73] );
 
       // Amplitude(s) for diagram number 164
-      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[6], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3867,7 +3867,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 165
-      helas_FFV1_0( w_fp[46], w_fp[57], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[57], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3880,7 +3880,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 166
-      helas_FFV1_0( w_fp[41], w_fp[72], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[72], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3893,7 +3893,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 167
-      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[4], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3908,7 +3908,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 168
-      helas_FFV1_0( w_fp[41], w_fp[9], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[9], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3921,7 +3921,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 169
-      helas_FFV1_0( w_fp[3], w_fp[72], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[72], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3936,7 +3936,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 170
-      helas_VVV1_0( w_fp[66], w_fp[27], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[27], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3955,7 +3955,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 171
-      helas_FFV1_0( w_fp[3], w_fp[60], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[60], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3967,11 +3967,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 172 OF 1240 ***
 
       // Wavefunction(s) for diagram number 172
-      helas_VVV1P0_1( w_fp[1], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[72] );
-      helas_VVV1P0_1( w_fp[72], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[74] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[72] );
+      helas_CD_VVV1P0_1( w_fp[72], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[74] );
 
       // Amplitude(s) for diagram number 172
-      helas_VVV1_0( w_fp[62], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -3987,10 +3987,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 173 OF 1240 ***
 
       // Wavefunction(s) for diagram number 173
-      helas_VVV1P0_1( w_fp[72], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[75] );
+      helas_CD_VVV1P0_1( w_fp[72], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[75] );
 
       // Amplitude(s) for diagram number 173
-      helas_VVV1_0( w_fp[62], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4009,7 +4009,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 174
-      helas_VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4021,7 +4021,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4033,7 +4033,7 @@ namespace mg5amcCpu
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4049,10 +4049,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 175 OF 1240 ***
 
       // Wavefunction(s) for diagram number 175
-      helas_FFV1_2( w_fp[3], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[76] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[76] );
 
       // Amplitude(s) for diagram number 175
-      helas_FFV1_0( w_fp[76], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4065,7 +4065,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 176
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4080,7 +4080,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 177
-      helas_FFV1_0( w_fp[76], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4093,7 +4093,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 178
-      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[55], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4105,10 +4105,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 179 OF 1240 ***
 
       // Wavefunction(s) for diagram number 179
-      helas_FFV1_1( w_fp[34], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
+      helas_CD_FFV1_1( w_fp[34], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
 
       // Amplitude(s) for diagram number 179
-      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4121,7 +4121,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 180
-      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[5], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4136,7 +4136,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 181
-      helas_FFV1_0( w_fp[46], w_fp[55], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[55], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4149,7 +4149,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 182
-      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4162,7 +4162,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 183
-      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[4], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4177,7 +4177,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 184
-      helas_FFV1_0( w_fp[38], w_fp[9], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[9], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4190,7 +4190,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 185
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4205,7 +4205,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 186
-      helas_VVV1_0( w_fp[72], w_fp[24], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[24], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4224,7 +4224,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 187
-      helas_FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4236,10 +4236,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 188 OF 1240 ***
 
       // Wavefunction(s) for diagram number 188
-      helas_FFV1_1( w_fp[34], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
+      helas_CD_FFV1_1( w_fp[34], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
 
       // Amplitude(s) for diagram number 188
-      helas_FFV1_0( w_fp[7], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4251,7 +4251,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 189
-      helas_FFV1_0( w_fp[53], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4260,10 +4260,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 190 OF 1240 ***
 
       // Wavefunction(s) for diagram number 190
-      helas_FFV1_2( w_fp[46], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[78] );
+      helas_CD_FFV1_2( w_fp[46], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[78] );
 
       // Amplitude(s) for diagram number 190
-      helas_FFV1_0( w_fp[78], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[55], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4275,7 +4275,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 191
-      helas_FFV1_0( w_fp[53], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4287,7 +4287,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 192
-      helas_FFV1_0( w_fp[78], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[57], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4299,7 +4299,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 193
-      helas_FFV1_0( w_fp[7], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4311,7 +4311,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 194
-      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4324,7 +4324,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 195
-      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[29], w_fp[73], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4339,7 +4339,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 196
-      helas_FFV1_0( w_fp[46], w_fp[58], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[58], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4352,7 +4352,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 197
-      helas_FFV1_0( w_fp[25], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4364,7 +4364,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 198
-      helas_FFV1_0( w_fp[48], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4373,10 +4373,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 199 OF 1240 ***
 
       // Wavefunction(s) for diagram number 199
-      helas_FFV1_2( w_fp[38], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] );
+      helas_CD_FFV1_2( w_fp[38], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] );
 
       // Amplitude(s) for diagram number 199
-      helas_FFV1_0( w_fp[58], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4388,7 +4388,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 200
-      helas_FFV1_0( w_fp[48], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4400,7 +4400,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 201
-      helas_FFV1_0( w_fp[58], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[57], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4412,7 +4412,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 202
-      helas_FFV1_0( w_fp[25], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4424,7 +4424,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 203
-      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4437,7 +4437,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 204
-      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[27], w_fp[67], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4452,7 +4452,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 205
-      helas_FFV1_0( w_fp[38], w_fp[60], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[60], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4465,7 +4465,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 206
-      helas_FFV1_0( w_fp[28], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4477,7 +4477,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 207
-      helas_FFV1_0( w_fp[40], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4486,10 +4486,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 208 OF 1240 ***
 
       // Wavefunction(s) for diagram number 208
-      helas_FFV1_2( w_fp[41], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
+      helas_CD_FFV1_2( w_fp[41], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
 
       // Amplitude(s) for diagram number 208
-      helas_FFV1_0( w_fp[60], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4501,7 +4501,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 209
-      helas_FFV1_0( w_fp[40], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4513,7 +4513,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 210
-      helas_FFV1_0( w_fp[60], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[55], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4525,7 +4525,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 211
-      helas_FFV1_0( w_fp[28], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4537,7 +4537,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 212
-      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4550,7 +4550,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 213
-      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[24], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4565,7 +4565,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 214
-      helas_FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4578,7 +4578,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 215
-      helas_FFV1_0( w_fp[26], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4591,7 +4591,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 216
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4603,10 +4603,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 217 OF 1240 ***
 
       // Wavefunction(s) for diagram number 217
-      helas_VVV1P0_1( w_fp[1], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[59] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[59] );
 
       // Amplitude(s) for diagram number 217
-      helas_VVV1_0( w_fp[62], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4625,7 +4625,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 218
-      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4644,7 +4644,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 219
-      helas_VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4656,7 +4656,7 @@ namespace mg5amcCpu
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4668,7 +4668,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4687,7 +4687,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 220
-      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[57], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4702,7 +4702,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 221
-      helas_FFV1_0( w_fp[26], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[57], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4715,7 +4715,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 222
-      helas_FFV1_0( w_fp[14], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4728,7 +4728,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 223
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4740,10 +4740,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 224 OF 1240 ***
 
       // Wavefunction(s) for diagram number 224
-      helas_VVV1P0_1( w_fp[1], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[68] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[68] );
 
       // Amplitude(s) for diagram number 224
-      helas_VVV1_0( w_fp[62], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4762,7 +4762,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 225
-      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4781,7 +4781,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 226
-      helas_VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4793,7 +4793,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4805,7 +4805,7 @@ namespace mg5amcCpu
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4824,7 +4824,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 227
-      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[55], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4839,7 +4839,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 228
-      helas_FFV1_0( w_fp[14], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[55], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4852,7 +4852,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 229
-      helas_FFV1_0( w_fp[12], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4865,7 +4865,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 230
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4877,10 +4877,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 231 OF 1240 ***
 
       // Wavefunction(s) for diagram number 231
-      helas_VVV1P0_1( w_fp[1], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[67] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[67] );
 
       // Amplitude(s) for diagram number 231
-      helas_VVV1_0( w_fp[62], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4899,7 +4899,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 232
-      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4918,7 +4918,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 233
-      helas_VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4930,7 +4930,7 @@ namespace mg5amcCpu
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4942,7 +4942,7 @@ namespace mg5amcCpu
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4961,7 +4961,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 234
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4976,7 +4976,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 235
-      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -4986,12 +4986,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 236 OF 1240 ***
 
       // Wavefunction(s) for diagram number 236
-      helas_VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[73] );
-      helas_VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[79] );
-      helas_VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[80] );
+      helas_CD_VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[73] );
+      helas_CD_VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[79] );
+      helas_CD_VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[80] );
 
       // Amplitude(s) for diagram number 236
-      helas_VVV1_0( w_fp[73], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[73], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5003,7 +5003,7 @@ namespace mg5amcCpu
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[79], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[79], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5015,7 +5015,7 @@ namespace mg5amcCpu
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[80], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[80], w_fp[6], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5034,7 +5034,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 237
-      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[57], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5042,7 +5042,7 @@ namespace mg5amcCpu
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[57], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5050,7 +5050,7 @@ namespace mg5amcCpu
       jamp_sv[20] += amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[57], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[57], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5065,7 +5065,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 238
-      helas_FFV1_0( w_fp[41], w_fp[34], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[34], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5073,7 +5073,7 @@ namespace mg5amcCpu
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[34], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[34], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5081,7 +5081,7 @@ namespace mg5amcCpu
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[12] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[34], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[34], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5093,12 +5093,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 239 OF 1240 ***
 
       // Wavefunction(s) for diagram number 239
-      helas_VVVV1P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[57] );
-      helas_VVVV3P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[81] );
-      helas_VVVV4P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[82] );
+      helas_CD_VVVV1P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[57] );
+      helas_CD_VVVV3P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[81] );
+      helas_CD_VVVV4P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[82] );
 
       // Amplitude(s) for diagram number 239
-      helas_VVV1_0( w_fp[57], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[57], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5110,7 +5110,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[81], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[81], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5122,7 +5122,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[82], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[82], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5141,7 +5141,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 240
-      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[55], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5149,7 +5149,7 @@ namespace mg5amcCpu
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[55], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5157,7 +5157,7 @@ namespace mg5amcCpu
       jamp_sv[14] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[16] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[55], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[55], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5172,7 +5172,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 241
-      helas_FFV1_0( w_fp[38], w_fp[34], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[34], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5180,7 +5180,7 @@ namespace mg5amcCpu
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[34], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[34], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5188,7 +5188,7 @@ namespace mg5amcCpu
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[18] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[34], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[34], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5200,12 +5200,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 242 OF 1240 ***
 
       // Wavefunction(s) for diagram number 242
-      helas_VVVV1P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[55] );
-      helas_VVVV3P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[83] );
-      helas_VVVV4P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[84] );
+      helas_CD_VVVV1P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[55] );
+      helas_CD_VVVV3P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[83] );
+      helas_CD_VVVV4P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[84] );
 
       // Amplitude(s) for diagram number 242
-      helas_VVV1_0( w_fp[55], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[55], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5217,7 +5217,7 @@ namespace mg5amcCpu
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[83], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[83], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5229,7 +5229,7 @@ namespace mg5amcCpu
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[84], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[84], w_fp[4], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5248,7 +5248,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 243
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5256,7 +5256,7 @@ namespace mg5amcCpu
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5264,7 +5264,7 @@ namespace mg5amcCpu
       jamp_sv[8] += amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[10] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5279,7 +5279,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 244
-      helas_FFV1_0( w_fp[46], w_fp[34], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[34], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5287,7 +5287,7 @@ namespace mg5amcCpu
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[34], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[34], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5295,7 +5295,7 @@ namespace mg5amcCpu
       jamp_sv[13] += amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[34], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[34], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5310,7 +5310,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 245
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5318,7 +5318,7 @@ namespace mg5amcCpu
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5326,7 +5326,7 @@ namespace mg5amcCpu
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[4] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5341,7 +5341,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 246
-      helas_VVV1_0( w_fp[1], w_fp[30], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[30], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5353,7 +5353,7 @@ namespace mg5amcCpu
       jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[31], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[31], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5365,7 +5365,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[32], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[32], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5381,13 +5381,13 @@ namespace mg5amcCpu
       // *** DIAGRAM 247 OF 1240 ***
 
       // Wavefunction(s) for diagram number 247
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
-      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
-      helas_FFV1_2( w_fp[62], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
-      helas_FFV1_1( w_fp[77], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
+      helas_CD_FFV1_2( w_fp[62], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] );
+      helas_CD_FFV1_1( w_fp[77], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 247
-      helas_FFV1_0( w_fp[34], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5396,10 +5396,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 248 OF 1240 ***
 
       // Wavefunction(s) for diagram number 248
-      helas_FFV1_1( w_fp[77], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[85] );
+      helas_CD_FFV1_1( w_fp[77], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[85] );
 
       // Amplitude(s) for diagram number 248
-      helas_FFV1_0( w_fp[34], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5408,11 +5408,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 249 OF 1240 ***
 
       // Wavefunction(s) for diagram number 249
-      helas_FFV1_2( w_fp[62], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] );
-      helas_FFV1_1( w_fp[77], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[87] );
+      helas_CD_FFV1_2( w_fp[62], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] );
+      helas_CD_FFV1_1( w_fp[77], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[87] );
 
       // Amplitude(s) for diagram number 249
-      helas_FFV1_0( w_fp[86], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5424,7 +5424,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 250
-      helas_FFV1_0( w_fp[86], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5433,10 +5433,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 251 OF 1240 ***
 
       // Wavefunction(s) for diagram number 251
-      helas_FFV1_2( w_fp[62], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] );
+      helas_CD_FFV1_2( w_fp[62], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] );
 
       // Amplitude(s) for diagram number 251
-      helas_FFV1_0( w_fp[88], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5448,7 +5448,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 252
-      helas_FFV1_0( w_fp[88], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5457,10 +5457,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 253 OF 1240 ***
 
       // Wavefunction(s) for diagram number 253
-      helas_FFV1P0_3( w_fp[62], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[89] );
+      helas_CD_FFV1P0_3( w_fp[62], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[89] );
 
       // Amplitude(s) for diagram number 253
-      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[24], w_fp[6], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5472,10 +5472,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 254 OF 1240 ***
 
       // Wavefunction(s) for diagram number 254
-      helas_FFV1_2( w_fp[62], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
+      helas_CD_FFV1_2( w_fp[62], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
 
       // Amplitude(s) for diagram number 254
-      helas_FFV1_0( w_fp[90], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5488,7 +5488,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 255
-      helas_FFV1_0( w_fp[88], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5501,7 +5501,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 256
-      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[27], w_fp[5], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5513,10 +5513,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 257 OF 1240 ***
 
       // Wavefunction(s) for diagram number 257
-      helas_FFV1_2( w_fp[62], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] );
+      helas_CD_FFV1_2( w_fp[62], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] );
 
       // Amplitude(s) for diagram number 257
-      helas_FFV1_0( w_fp[91], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[91], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5529,7 +5529,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 258
-      helas_FFV1_0( w_fp[86], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5542,7 +5542,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 259
-      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[29], w_fp[89], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5557,7 +5557,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 260
-      helas_FFV1_0( w_fp[34], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5567,10 +5567,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 261 OF 1240 ***
 
       // Wavefunction(s) for diagram number 261
-      helas_FFV1_2( w_fp[62], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] );
+      helas_CD_FFV1_2( w_fp[62], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] );
 
       // Amplitude(s) for diagram number 261
-      helas_FFV1_0( w_fp[89], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[89], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5583,7 +5583,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 262
-      helas_FFV1_0( w_fp[62], w_fp[77], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[77], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5591,7 +5591,7 @@ namespace mg5amcCpu
       jamp_sv[35] -= amp_sv[0];
       jamp_sv[41] -= amp_sv[0];
       jamp_sv[47] += amp_sv[0];
-      helas_FFV1_0( w_fp[62], w_fp[77], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[77], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5599,7 +5599,7 @@ namespace mg5amcCpu
       jamp_sv[39] += amp_sv[0];
       jamp_sv[41] -= amp_sv[0];
       jamp_sv[45] += amp_sv[0];
-      helas_FFV1_0( w_fp[62], w_fp[77], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[77], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5611,10 +5611,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 263 OF 1240 ***
 
       // Wavefunction(s) for diagram number 263
-      helas_FFV1P0_3( w_fp[62], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[92] );
+      helas_CD_FFV1P0_3( w_fp[62], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[92] );
 
       // Amplitude(s) for diagram number 263
-      helas_VVV1_0( w_fp[92], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5633,7 +5633,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 264
-      helas_VVV1_0( w_fp[92], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5652,7 +5652,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 265
-      helas_VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5664,7 +5664,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5676,7 +5676,7 @@ namespace mg5amcCpu
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5692,10 +5692,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 266 OF 1240 ***
 
       // Wavefunction(s) for diagram number 266
-      helas_FFV1_1( w_fp[2], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[93] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[93] );
 
       // Amplitude(s) for diagram number 266
-      helas_FFV1_0( w_fp[86], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5708,7 +5708,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 267
-      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5723,7 +5723,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 268
-      helas_FFV1_0( w_fp[88], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5736,7 +5736,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 269
-      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5748,10 +5748,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 270 OF 1240 ***
 
       // Wavefunction(s) for diagram number 270
-      helas_FFV1_2( w_fp[62], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] );
+      helas_CD_FFV1_2( w_fp[62], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] );
 
       // Amplitude(s) for diagram number 270
-      helas_FFV1_0( w_fp[94], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[94], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5761,10 +5761,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 271 OF 1240 ***
 
       // Wavefunction(s) for diagram number 271
-      helas_FFV1P0_3( w_fp[62], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[95] );
+      helas_CD_FFV1P0_3( w_fp[62], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[95] );
 
       // Amplitude(s) for diagram number 271
-      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5779,7 +5779,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 272
-      helas_FFV1_0( w_fp[88], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5792,7 +5792,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 273
-      helas_FFV1_0( w_fp[94], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[94], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5802,10 +5802,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 274 OF 1240 ***
 
       // Wavefunction(s) for diagram number 274
-      helas_FFV1P0_3( w_fp[62], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[96] );
+      helas_CD_FFV1P0_3( w_fp[62], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[96] );
 
       // Amplitude(s) for diagram number 274
-      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[5], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5820,7 +5820,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 275
-      helas_FFV1_0( w_fp[86], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5833,7 +5833,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 276
-      helas_FFV1_0( w_fp[94], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[94], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5848,7 +5848,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 277
-      helas_VVV1_0( w_fp[61], w_fp[29], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[29], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5867,7 +5867,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 278
-      helas_FFV1_0( w_fp[89], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[89], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5882,7 +5882,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 279
-      helas_VVV1_0( w_fp[92], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5901,7 +5901,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 280
-      helas_VVV1_0( w_fp[92], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5920,7 +5920,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 281
-      helas_VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5932,7 +5932,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5944,7 +5944,7 @@ namespace mg5amcCpu
       jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5960,10 +5960,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 282 OF 1240 ***
 
       // Wavefunction(s) for diagram number 282
-      helas_FFV1_1( w_fp[2], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] );
 
       // Amplitude(s) for diagram number 282
-      helas_FFV1_0( w_fp[34], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5976,7 +5976,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 283
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -5991,7 +5991,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 284
-      helas_FFV1_0( w_fp[88], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6004,7 +6004,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 285
-      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6016,10 +6016,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 286 OF 1240 ***
 
       // Wavefunction(s) for diagram number 286
-      helas_FFV1_2( w_fp[62], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] );
+      helas_CD_FFV1_2( w_fp[62], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] );
 
       // Amplitude(s) for diagram number 286
-      helas_FFV1_0( w_fp[97], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[97], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6029,10 +6029,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 287 OF 1240 ***
 
       // Wavefunction(s) for diagram number 287
-      helas_FFV1P0_3( w_fp[62], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[98] );
+      helas_CD_FFV1P0_3( w_fp[62], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[98] );
 
       // Amplitude(s) for diagram number 287
-      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[6], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6047,7 +6047,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 288
-      helas_FFV1_0( w_fp[88], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6060,7 +6060,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 289
-      helas_FFV1_0( w_fp[97], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[97], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6073,7 +6073,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 290
-      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[4], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6088,7 +6088,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 291
-      helas_FFV1_0( w_fp[34], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6101,7 +6101,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 292
-      helas_FFV1_0( w_fp[97], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[97], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6116,7 +6116,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 293
-      helas_VVV1_0( w_fp[66], w_fp[27], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[27], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6135,7 +6135,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 294
-      helas_FFV1_0( w_fp[91], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[91], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6150,7 +6150,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 295
-      helas_VVV1_0( w_fp[92], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6169,7 +6169,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 296
-      helas_VVV1_0( w_fp[92], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6188,7 +6188,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 297
-      helas_VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6200,7 +6200,7 @@ namespace mg5amcCpu
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6212,7 +6212,7 @@ namespace mg5amcCpu
       jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6228,10 +6228,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 298 OF 1240 ***
 
       // Wavefunction(s) for diagram number 298
-      helas_FFV1_1( w_fp[2], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] );
 
       // Amplitude(s) for diagram number 298
-      helas_FFV1_0( w_fp[34], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6244,7 +6244,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 299
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6259,7 +6259,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 300
-      helas_FFV1_0( w_fp[86], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6272,7 +6272,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 301
-      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6284,10 +6284,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 302 OF 1240 ***
 
       // Wavefunction(s) for diagram number 302
-      helas_FFV1_2( w_fp[62], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_CD_FFV1_2( w_fp[62], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 302
-      helas_FFV1_0( w_fp[99], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6300,7 +6300,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 303
-      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6315,7 +6315,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 304
-      helas_FFV1_0( w_fp[86], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6328,7 +6328,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 305
-      helas_FFV1_0( w_fp[99], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6341,7 +6341,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 306
-      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[4], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6356,7 +6356,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 307
-      helas_FFV1_0( w_fp[34], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6369,7 +6369,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 308
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6384,7 +6384,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 309
-      helas_VVV1_0( w_fp[72], w_fp[24], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[24], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6403,7 +6403,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 310
-      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6415,10 +6415,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 311 OF 1240 ***
 
       // Wavefunction(s) for diagram number 311
-      helas_FFV1_2( w_fp[62], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_CD_FFV1_2( w_fp[62], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 311
-      helas_FFV1_0( w_fp[99], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6430,7 +6430,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 312
-      helas_FFV1_0( w_fp[99], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6439,10 +6439,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 313 OF 1240 ***
 
       // Wavefunction(s) for diagram number 313
-      helas_FFV1_1( w_fp[33], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[100] );
+      helas_CD_FFV1_1( w_fp[33], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[100] );
 
       // Amplitude(s) for diagram number 313
-      helas_FFV1_0( w_fp[86], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6454,7 +6454,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 314
-      helas_FFV1_0( w_fp[86], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6466,7 +6466,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 315
-      helas_FFV1_0( w_fp[88], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6478,7 +6478,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 316
-      helas_FFV1_0( w_fp[88], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6490,7 +6490,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 317
-      helas_FFV1_0( w_fp[99], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6503,7 +6503,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 318
-      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[29], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6518,7 +6518,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 319
-      helas_FFV1_0( w_fp[89], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[89], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6531,7 +6531,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 320
-      helas_FFV1_0( w_fp[99], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6543,7 +6543,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 321
-      helas_FFV1_0( w_fp[99], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6552,10 +6552,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 322 OF 1240 ***
 
       // Wavefunction(s) for diagram number 322
-      helas_FFV1_1( w_fp[39], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] );
+      helas_CD_FFV1_1( w_fp[39], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] );
 
       // Amplitude(s) for diagram number 322
-      helas_FFV1_0( w_fp[34], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6567,7 +6567,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 323
-      helas_FFV1_0( w_fp[34], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6579,7 +6579,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 324
-      helas_FFV1_0( w_fp[88], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6591,7 +6591,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 325
-      helas_FFV1_0( w_fp[88], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6603,7 +6603,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 326
-      helas_FFV1_0( w_fp[99], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6616,7 +6616,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 327
-      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[27], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6631,7 +6631,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 328
-      helas_FFV1_0( w_fp[91], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[91], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6644,7 +6644,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 329
-      helas_FFV1_0( w_fp[99], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6656,7 +6656,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 330
-      helas_FFV1_0( w_fp[99], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6665,10 +6665,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 331 OF 1240 ***
 
       // Wavefunction(s) for diagram number 331
-      helas_FFV1_1( w_fp[47], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] );
+      helas_CD_FFV1_1( w_fp[47], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] );
 
       // Amplitude(s) for diagram number 331
-      helas_FFV1_0( w_fp[34], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6680,7 +6680,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 332
-      helas_FFV1_0( w_fp[34], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6692,7 +6692,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 333
-      helas_FFV1_0( w_fp[86], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6704,7 +6704,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 334
-      helas_FFV1_0( w_fp[86], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6716,7 +6716,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 335
-      helas_FFV1_0( w_fp[99], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6729,7 +6729,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 336
-      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[24], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6744,7 +6744,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 337
-      helas_FFV1_0( w_fp[90], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6757,7 +6757,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 338
-      helas_FFV1_0( w_fp[99], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6770,7 +6770,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 339
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6785,7 +6785,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 340
-      helas_VVV1_0( w_fp[92], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6804,7 +6804,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 341
-      helas_VVV1_0( w_fp[92], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6823,7 +6823,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 342
-      helas_VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6835,7 +6835,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6847,7 +6847,7 @@ namespace mg5amcCpu
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6866,7 +6866,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 343
-      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6881,7 +6881,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 344
-      helas_FFV1_0( w_fp[88], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6894,7 +6894,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 345
-      helas_FFV1_0( w_fp[99], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6907,7 +6907,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 346
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6922,7 +6922,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 347
-      helas_VVV1_0( w_fp[92], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6941,7 +6941,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 348
-      helas_VVV1_0( w_fp[92], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6960,7 +6960,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 349
-      helas_VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6972,7 +6972,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -6984,7 +6984,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7003,7 +7003,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 350
-      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7018,7 +7018,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 351
-      helas_FFV1_0( w_fp[86], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7031,7 +7031,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 352
-      helas_FFV1_0( w_fp[99], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7044,7 +7044,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 353
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7059,7 +7059,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 354
-      helas_VVV1_0( w_fp[92], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7078,7 +7078,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 355
-      helas_VVV1_0( w_fp[92], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7097,7 +7097,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 356
-      helas_VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7109,7 +7109,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7121,7 +7121,7 @@ namespace mg5amcCpu
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7140,7 +7140,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 357
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7155,7 +7155,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 358
-      helas_FFV1_0( w_fp[34], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7168,7 +7168,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 359
-      helas_VVV1_0( w_fp[73], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[73], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7180,7 +7180,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[79], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[79], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7192,7 +7192,7 @@ namespace mg5amcCpu
       jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[80], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[80], w_fp[6], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7211,7 +7211,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 360
-      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7219,7 +7219,7 @@ namespace mg5amcCpu
       jamp_sv[39] -= amp_sv[0];
       jamp_sv[63] -= amp_sv[0];
       jamp_sv[87] += amp_sv[0];
-      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7227,7 +7227,7 @@ namespace mg5amcCpu
       jamp_sv[57] += amp_sv[0];
       jamp_sv[63] -= amp_sv[0];
       jamp_sv[81] += amp_sv[0];
-      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7242,7 +7242,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 361
-      helas_FFV1_0( w_fp[62], w_fp[47], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[47], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7250,7 +7250,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_FFV1_0( w_fp[62], w_fp[47], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[47], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7258,7 +7258,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_FFV1_0( w_fp[62], w_fp[47], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[47], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7273,7 +7273,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 362
-      helas_VVV1_0( w_fp[57], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[57], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7285,7 +7285,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[81], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[81], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7297,7 +7297,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[82], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[82], w_fp[5], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7316,7 +7316,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 363
-      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7324,7 +7324,7 @@ namespace mg5amcCpu
       jamp_sv[45] -= amp_sv[0];
       jamp_sv[69] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
-      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7332,7 +7332,7 @@ namespace mg5amcCpu
       jamp_sv[59] += amp_sv[0];
       jamp_sv[69] -= amp_sv[0];
       jamp_sv[105] += amp_sv[0];
-      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7347,7 +7347,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 364
-      helas_FFV1_0( w_fp[62], w_fp[39], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[39], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7355,7 +7355,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
-      helas_FFV1_0( w_fp[62], w_fp[39], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[39], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7363,7 +7363,7 @@ namespace mg5amcCpu
       jamp_sv[87] += amp_sv[0];
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
-      helas_FFV1_0( w_fp[62], w_fp[39], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[39], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7378,7 +7378,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 365
-      helas_VVV1_0( w_fp[55], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[55], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7390,7 +7390,7 @@ namespace mg5amcCpu
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[83], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[83], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7402,7 +7402,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[84], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[84], w_fp[4], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7421,7 +7421,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 366
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7429,7 +7429,7 @@ namespace mg5amcCpu
       jamp_sv[47] -= amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7437,7 +7437,7 @@ namespace mg5amcCpu
       jamp_sv[83] += amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[107] += amp_sv[0];
-      helas_FFV1_0( w_fp[34], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[34], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7452,7 +7452,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 367
-      helas_FFV1_0( w_fp[62], w_fp[33], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[33], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7460,7 +7460,7 @@ namespace mg5amcCpu
       jamp_sv[59] -= amp_sv[0];
       jamp_sv[65] -= amp_sv[0];
       jamp_sv[71] += amp_sv[0];
-      helas_FFV1_0( w_fp[62], w_fp[33], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[33], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7468,7 +7468,7 @@ namespace mg5amcCpu
       jamp_sv[63] += amp_sv[0];
       jamp_sv[65] -= amp_sv[0];
       jamp_sv[69] += amp_sv[0];
-      helas_FFV1_0( w_fp[62], w_fp[33], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[33], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7483,7 +7483,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 368
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7491,7 +7491,7 @@ namespace mg5amcCpu
       jamp_sv[71] -= amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7499,7 +7499,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7514,7 +7514,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 369
-      helas_VVV1_0( w_fp[1], w_fp[30], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[30], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7526,7 +7526,7 @@ namespace mg5amcCpu
       jamp_sv[71] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[31], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[31], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7538,7 +7538,7 @@ namespace mg5amcCpu
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[32], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[32], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7554,11 +7554,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 370 OF 1240 ***
 
       // Wavefunction(s) for diagram number 370
-      helas_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[92] );
-      helas_FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[92] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 370
-      helas_FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7571,7 +7571,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 371
-      helas_FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7581,11 +7581,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 372 OF 1240 ***
 
       // Wavefunction(s) for diagram number 372
-      helas_VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[62] );
-      helas_FFV1P0_3( w_fp[3], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[34] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[62] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[34] );
 
       // Amplitude(s) for diagram number 372
-      helas_VVV1_0( w_fp[62], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7604,7 +7604,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 373
-      helas_FFV1_0( w_fp[3], w_fp[85], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[85], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7616,10 +7616,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 374 OF 1240 ***
 
       // Wavefunction(s) for diagram number 374
-      helas_VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[86] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 374
-      helas_VVV1_0( w_fp[86], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7638,7 +7638,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 375
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7650,12 +7650,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 376 OF 1240 ***
 
       // Wavefunction(s) for diagram number 376
-      helas_VVVV1P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] );
-      helas_VVVV3P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] );
-      helas_VVVV4P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] );
+      helas_CD_VVVV1P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] );
+      helas_CD_VVVV3P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] );
+      helas_CD_VVVV4P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] );
 
       // Amplitude(s) for diagram number 376
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7667,7 +7667,7 @@ namespace mg5amcCpu
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7679,7 +7679,7 @@ namespace mg5amcCpu
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7695,10 +7695,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 377 OF 1240 ***
 
       // Wavefunction(s) for diagram number 377
-      helas_FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[95] );
+      helas_CD_FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[95] );
 
       // Amplitude(s) for diagram number 377
-      helas_FFV1_0( w_fp[38], w_fp[95], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[95], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7708,10 +7708,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 378 OF 1240 ***
 
       // Wavefunction(s) for diagram number 378
-      helas_FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
+      helas_CD_FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
 
       // Amplitude(s) for diagram number 378
-      helas_FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7724,7 +7724,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 379
-      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7739,7 +7739,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 380
-      helas_FFV1_0( w_fp[41], w_fp[95], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[95], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7749,10 +7749,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 381 OF 1240 ***
 
       // Wavefunction(s) for diagram number 381
-      helas_FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[101] );
+      helas_CD_FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[101] );
 
       // Amplitude(s) for diagram number 381
-      helas_FFV1_0( w_fp[101], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[101], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7765,7 +7765,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 382
-      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[77], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7780,7 +7780,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 383
-      helas_FFV1_0( w_fp[3], w_fp[95], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[95], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7795,7 +7795,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 384
-      helas_FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7807,10 +7807,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 385 OF 1240 ***
 
       // Wavefunction(s) for diagram number 385
-      helas_VVV1P0_1( w_fp[92], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[95] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[95] );
 
       // Amplitude(s) for diagram number 385
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7826,10 +7826,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 386 OF 1240 ***
 
       // Wavefunction(s) for diagram number 386
-      helas_FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
 
       // Amplitude(s) for diagram number 386
-      helas_FFV1_0( w_fp[22], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7842,7 +7842,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 387
-      helas_FFV1_0( w_fp[21], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7852,10 +7852,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 388 OF 1240 ***
 
       // Wavefunction(s) for diagram number 388
-      helas_FFV1P0_3( w_fp[52], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[103] );
+      helas_CD_FFV1P0_3( w_fp[52], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[103] );
 
       // Amplitude(s) for diagram number 388
-      helas_VVV1_0( w_fp[62], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7874,7 +7874,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 389
-      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7889,7 +7889,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 390
-      helas_VVV1_0( w_fp[86], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7908,7 +7908,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 391
-      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7923,7 +7923,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 392
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7935,7 +7935,7 @@ namespace mg5amcCpu
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7947,7 +7947,7 @@ namespace mg5amcCpu
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7963,10 +7963,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 393 OF 1240 ***
 
       // Wavefunction(s) for diagram number 393
-      helas_FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
+      helas_CD_FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
 
       // Amplitude(s) for diagram number 393
-      helas_FFV1_0( w_fp[104], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7976,10 +7976,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 394 OF 1240 ***
 
       // Wavefunction(s) for diagram number 394
-      helas_FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[105] );
+      helas_CD_FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[105] );
 
       // Amplitude(s) for diagram number 394
-      helas_FFV1_0( w_fp[52], w_fp[105], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[105], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -7992,7 +7992,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 395
-      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[39], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8007,7 +8007,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 396
-      helas_FFV1_0( w_fp[104], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8017,10 +8017,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 397 OF 1240 ***
 
       // Wavefunction(s) for diagram number 397
-      helas_FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
+      helas_CD_FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
 
       // Amplitude(s) for diagram number 397
-      helas_FFV1_0( w_fp[52], w_fp[106], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[106], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8033,7 +8033,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 398
-      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[47], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8048,7 +8048,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 399
-      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8063,7 +8063,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 400
-      helas_FFV1_0( w_fp[52], w_fp[102], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[102], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8078,7 +8078,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 401
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8097,7 +8097,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 402
-      helas_FFV1_0( w_fp[71], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8112,7 +8112,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 403
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8131,7 +8131,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 404
-      helas_FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8146,7 +8146,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 405
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8165,7 +8165,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 406
-      helas_FFV1_0( w_fp[3], w_fp[94], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[94], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8184,7 +8184,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 407
-      helas_FFV1_0( w_fp[71], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8203,7 +8203,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 408
-      helas_VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8223,7 +8223,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8243,7 +8243,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8267,10 +8267,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 409 OF 1240 ***
 
       // Wavefunction(s) for diagram number 409
-      helas_VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 409
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8294,10 +8294,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 410 OF 1240 ***
 
       // Wavefunction(s) for diagram number 410
-      helas_VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[107] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[107] );
 
       // Amplitude(s) for diagram number 410
-      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8324,7 +8324,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 411
-      helas_VVV1_0( w_fp[66], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8351,7 +8351,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 412
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8370,7 +8370,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 413
-      helas_FFV1_0( w_fp[3], w_fp[106], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[106], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8385,7 +8385,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 414
-      helas_FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8400,7 +8400,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 415
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8419,7 +8419,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 416
-      helas_FFV1_0( w_fp[41], w_fp[102], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[102], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8434,7 +8434,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 417
-      helas_FFV1_0( w_fp[101], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[101], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8449,7 +8449,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 418
-      helas_FFV1_0( w_fp[76], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8464,7 +8464,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 419
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8483,7 +8483,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 420
-      helas_FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8498,7 +8498,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 421
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8517,7 +8517,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 422
-      helas_FFV1_0( w_fp[3], w_fp[97], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[97], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8536,7 +8536,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 423
-      helas_FFV1_0( w_fp[76], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8555,7 +8555,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 424
-      helas_VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8575,7 +8575,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8595,7 +8595,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8619,10 +8619,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 425 OF 1240 ***
 
       // Wavefunction(s) for diagram number 425
-      helas_VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 425
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8649,7 +8649,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 426
-      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8676,7 +8676,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 427
-      helas_VVV1_0( w_fp[72], w_fp[8], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[8], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8703,7 +8703,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 428
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8722,7 +8722,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 429
-      helas_FFV1_0( w_fp[3], w_fp[105], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[105], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8737,7 +8737,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 430
-      helas_FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8752,7 +8752,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 431
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8771,7 +8771,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 432
-      helas_FFV1_0( w_fp[38], w_fp[102], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[102], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8786,7 +8786,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 433
-      helas_FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8798,10 +8798,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 434 OF 1240 ***
 
       // Wavefunction(s) for diagram number 434
-      helas_VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 434
-      helas_VVV1_0( w_fp[104], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8828,7 +8828,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 435
-      helas_VVV1_0( w_fp[104], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8855,7 +8855,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 436
-      helas_VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8875,7 +8875,7 @@ namespace mg5amcCpu
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8895,7 +8895,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8919,10 +8919,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 437 OF 1240 ***
 
       // Wavefunction(s) for diagram number 437
-      helas_VVV1P0_1( w_fp[1], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[108] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[108] );
 
       // Amplitude(s) for diagram number 437
-      helas_VVV1_0( w_fp[62], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8949,7 +8949,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 438
-      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8976,7 +8976,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 439
-      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -8996,7 +8996,7 @@ namespace mg5amcCpu
       jamp_sv[109] += amp_sv[0];
       jamp_sv[115] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9016,7 +9016,7 @@ namespace mg5amcCpu
       jamp_sv[104] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9043,7 +9043,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 440
-      helas_VVV1_0( w_fp[86], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9070,7 +9070,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 441
-      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9097,7 +9097,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 442
-      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9117,7 +9117,7 @@ namespace mg5amcCpu
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9137,7 +9137,7 @@ namespace mg5amcCpu
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9161,12 +9161,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 443 OF 1240 ***
 
       // Wavefunction(s) for diagram number 443
-      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
-      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
-      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_CD_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
+      helas_CD_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
+      helas_CD_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 443
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9186,7 +9186,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9206,7 +9206,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9230,12 +9230,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 444 OF 1240 ***
 
       // Wavefunction(s) for diagram number 444
-      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] );
-      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[113] );
-      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[114] );
+      helas_CD_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] );
+      helas_CD_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[113] );
+      helas_CD_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[114] );
 
       // Amplitude(s) for diagram number 444
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9255,7 +9255,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9275,7 +9275,7 @@ namespace mg5amcCpu
       jamp_sv[94] += amp_sv[0];
       jamp_sv[98] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[114], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[114], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9302,7 +9302,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 445
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9322,7 +9322,7 @@ namespace mg5amcCpu
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9342,7 +9342,7 @@ namespace mg5amcCpu
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9369,7 +9369,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 446
-      helas_VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9389,7 +9389,7 @@ namespace mg5amcCpu
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9409,7 +9409,7 @@ namespace mg5amcCpu
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9436,7 +9436,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 447
-      helas_VVV1_0( w_fp[8], w_fp[29], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[29], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9463,7 +9463,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 448
-      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9490,7 +9490,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 449
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9517,7 +9517,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 450
-      helas_VVV1_0( w_fp[104], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9536,7 +9536,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 451
-      helas_FFV1_0( w_fp[3], w_fp[44], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[44], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9551,7 +9551,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 452
-      helas_FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9564,7 +9564,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 453
-      helas_FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9577,7 +9577,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 454
-      helas_FFV1_0( w_fp[3], w_fp[89], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[89], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9592,7 +9592,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 455
-      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9611,7 +9611,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 456
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9623,7 +9623,7 @@ namespace mg5amcCpu
       jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[113], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[113], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9635,7 +9635,7 @@ namespace mg5amcCpu
       jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[114], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[114], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9654,7 +9654,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 457
-      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9669,7 +9669,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 458
-      helas_FFV1_0( w_fp[41], w_fp[105], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[105], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9682,7 +9682,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 459
-      helas_FFV1_0( w_fp[101], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[101], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9695,7 +9695,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 460
-      helas_VVV1_0( w_fp[104], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9714,7 +9714,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 461
-      helas_FFV1_0( w_fp[3], w_fp[50], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[50], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9729,7 +9729,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 462
-      helas_FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9742,7 +9742,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 463
-      helas_FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9755,7 +9755,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 464
-      helas_FFV1_0( w_fp[3], w_fp[91], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[91], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9770,7 +9770,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 465
-      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9789,7 +9789,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 466
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9801,7 +9801,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9813,7 +9813,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9832,7 +9832,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 467
-      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[47], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9847,7 +9847,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 468
-      helas_FFV1_0( w_fp[38], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9860,7 +9860,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 469
-      helas_FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9873,7 +9873,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 470
-      helas_VVV1_0( w_fp[104], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9892,7 +9892,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 471
-      helas_FFV1_0( w_fp[48], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9907,7 +9907,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 472
-      helas_FFV1_0( w_fp[58], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9920,7 +9920,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 473
-      helas_FFV1_0( w_fp[48], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9933,7 +9933,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 474
-      helas_FFV1_0( w_fp[58], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9948,7 +9948,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 475
-      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9967,7 +9967,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 476
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9979,7 +9979,7 @@ namespace mg5amcCpu
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[113], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[113], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -9991,7 +9991,7 @@ namespace mg5amcCpu
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[114], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[114], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10010,7 +10010,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 477
-      helas_VVV1_0( w_fp[104], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10029,7 +10029,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 478
-      helas_FFV1_0( w_fp[40], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10044,7 +10044,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 479
-      helas_FFV1_0( w_fp[60], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10057,7 +10057,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 480
-      helas_FFV1_0( w_fp[40], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10070,7 +10070,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 481
-      helas_FFV1_0( w_fp[60], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10085,7 +10085,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 482
-      helas_VVV1_0( w_fp[62], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[62], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10104,7 +10104,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 483
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10116,7 +10116,7 @@ namespace mg5amcCpu
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10128,7 +10128,7 @@ namespace mg5amcCpu
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10147,7 +10147,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 484
-      helas_FFV1_0( w_fp[3], w_fp[18], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[18], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10166,7 +10166,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 485
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10185,7 +10185,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 486
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10204,7 +10204,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 487
-      helas_FFV1_0( w_fp[12], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10219,7 +10219,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 488
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10238,7 +10238,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 489
-      helas_FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10253,7 +10253,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 490
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10265,7 +10265,7 @@ namespace mg5amcCpu
       jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10277,7 +10277,7 @@ namespace mg5amcCpu
       jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10296,7 +10296,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 491
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10308,7 +10308,7 @@ namespace mg5amcCpu
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10320,7 +10320,7 @@ namespace mg5amcCpu
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10339,7 +10339,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 492
-      helas_VVV1_0( w_fp[92], w_fp[55], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[55], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10359,7 +10359,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      helas_VVV1_0( w_fp[92], w_fp[83], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[83], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10379,7 +10379,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      helas_VVV1_0( w_fp[92], w_fp[84], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[84], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10403,11 +10403,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 493 OF 1240 ***
 
       // Wavefunction(s) for diagram number 493
-      helas_VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[92] );
-      helas_FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[92] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 493
-      helas_FFV1_0( w_fp[99], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10420,7 +10420,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 494
-      helas_FFV1_0( w_fp[99], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10430,10 +10430,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 495 OF 1240 ***
 
       // Wavefunction(s) for diagram number 495
-      helas_VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[102] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[102] );
 
       // Amplitude(s) for diagram number 495
-      helas_VVV1_0( w_fp[102], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[102], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10452,7 +10452,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 496
-      helas_FFV1_0( w_fp[3], w_fp[85], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[85], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10464,10 +10464,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 497 OF 1240 ***
 
       // Wavefunction(s) for diagram number 497
-      helas_VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 497
-      helas_VVV1_0( w_fp[104], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10486,7 +10486,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 498
-      helas_FFV1_0( w_fp[3], w_fp[87], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[87], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10498,12 +10498,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 499 OF 1240 ***
 
       // Wavefunction(s) for diagram number 499
-      helas_VVVV1P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
-      helas_VVVV3P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[110] );
-      helas_VVVV4P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[109] );
+      helas_CD_VVVV1P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_CD_VVVV3P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[110] );
+      helas_CD_VVVV4P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[109] );
 
       // Amplitude(s) for diagram number 499
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10515,7 +10515,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10527,7 +10527,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10543,10 +10543,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 500 OF 1240 ***
 
       // Wavefunction(s) for diagram number 500
-      helas_FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
+      helas_CD_FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
 
       // Amplitude(s) for diagram number 500
-      helas_FFV1_0( w_fp[46], w_fp[62], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[62], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10556,10 +10556,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 501 OF 1240 ***
 
       // Wavefunction(s) for diagram number 501
-      helas_FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
+      helas_CD_FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
 
       // Amplitude(s) for diagram number 501
-      helas_FFV1_0( w_fp[114], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[114], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10572,7 +10572,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 502
-      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[77], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10587,7 +10587,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 503
-      helas_FFV1_0( w_fp[41], w_fp[62], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[62], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10597,10 +10597,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 504 OF 1240 ***
 
       // Wavefunction(s) for diagram number 504
-      helas_FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] );
+      helas_CD_FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] );
 
       // Amplitude(s) for diagram number 504
-      helas_FFV1_0( w_fp[113], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[113], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10613,7 +10613,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 505
-      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[77], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10628,7 +10628,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 506
-      helas_FFV1_0( w_fp[3], w_fp[62], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[62], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10643,7 +10643,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 507
-      helas_FFV1_0( w_fp[99], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10655,10 +10655,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 508 OF 1240 ***
 
       // Wavefunction(s) for diagram number 508
-      helas_VVV1P0_1( w_fp[92], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[62] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[62] );
 
       // Amplitude(s) for diagram number 508
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10674,10 +10674,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 509 OF 1240 ***
 
       // Wavefunction(s) for diagram number 509
-      helas_FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[112] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[112] );
 
       // Amplitude(s) for diagram number 509
-      helas_FFV1_0( w_fp[56], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10690,7 +10690,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 510
-      helas_FFV1_0( w_fp[21], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10703,7 +10703,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 511
-      helas_VVV1_0( w_fp[102], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[102], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10722,7 +10722,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 512
-      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10737,7 +10737,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 513
-      helas_VVV1_0( w_fp[104], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10756,7 +10756,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 514
-      helas_FFV1_0( w_fp[56], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10771,7 +10771,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 515
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10783,7 +10783,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10795,7 +10795,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10811,10 +10811,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 516 OF 1240 ***
 
       // Wavefunction(s) for diagram number 516
-      helas_FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] );
+      helas_CD_FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] );
 
       // Amplitude(s) for diagram number 516
-      helas_FFV1_0( w_fp[86], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10824,10 +10824,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 517 OF 1240 ***
 
       // Wavefunction(s) for diagram number 517
-      helas_FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
+      helas_CD_FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
 
       // Amplitude(s) for diagram number 517
-      helas_FFV1_0( w_fp[52], w_fp[98], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[98], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10840,7 +10840,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 518
-      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10855,7 +10855,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 519
-      helas_FFV1_0( w_fp[86], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10865,10 +10865,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 520 OF 1240 ***
 
       // Wavefunction(s) for diagram number 520
-      helas_FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
+      helas_CD_FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
 
       // Amplitude(s) for diagram number 520
-      helas_FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10881,7 +10881,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 521
-      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[47], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10896,7 +10896,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 522
-      helas_FFV1_0( w_fp[86], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[86], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10911,7 +10911,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 523
-      helas_FFV1_0( w_fp[52], w_fp[112], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[112], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10926,7 +10926,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 524
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10945,7 +10945,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 525
-      helas_FFV1_0( w_fp[65], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10960,7 +10960,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 526
-      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[112], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10979,7 +10979,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 527
-      helas_FFV1_0( w_fp[99], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -10994,7 +10994,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 528
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11013,7 +11013,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 529
-      helas_FFV1_0( w_fp[3], w_fp[93], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[93], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11032,7 +11032,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 530
-      helas_FFV1_0( w_fp[65], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11051,7 +11051,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 531
-      helas_VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11071,7 +11071,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[114] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11091,7 +11091,7 @@ namespace mg5amcCpu
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
-      helas_VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11115,10 +11115,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 532 OF 1240 ***
 
       // Wavefunction(s) for diagram number 532
-      helas_VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[86] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 532
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11142,10 +11142,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 533 OF 1240 ***
 
       // Wavefunction(s) for diagram number 533
-      helas_VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[101] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[101] );
 
       // Amplitude(s) for diagram number 533
-      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[6], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11172,7 +11172,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 534
-      helas_VVV1_0( w_fp[61], w_fp[8], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[8], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11199,7 +11199,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 535
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11218,7 +11218,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 536
-      helas_FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11233,7 +11233,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 537
-      helas_FFV1_0( w_fp[99], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11248,7 +11248,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 538
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11267,7 +11267,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 539
-      helas_FFV1_0( w_fp[41], w_fp[112], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[112], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11282,7 +11282,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 540
-      helas_FFV1_0( w_fp[113], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[113], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11297,7 +11297,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 541
-      helas_FFV1_0( w_fp[76], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11312,7 +11312,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 542
-      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[112], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11331,7 +11331,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 543
-      helas_FFV1_0( w_fp[99], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11346,7 +11346,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 544
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11365,7 +11365,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 545
-      helas_FFV1_0( w_fp[3], w_fp[97], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[97], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11384,7 +11384,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 546
-      helas_FFV1_0( w_fp[76], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11403,7 +11403,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 547
-      helas_VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11423,7 +11423,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[103] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11443,7 +11443,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11467,10 +11467,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 548 OF 1240 ***
 
       // Wavefunction(s) for diagram number 548
-      helas_VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[86] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 548
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11497,7 +11497,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 549
-      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[4], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11524,7 +11524,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 550
-      helas_VVV1_0( w_fp[72], w_fp[8], w_fp[102], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[8], w_fp[102], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11551,7 +11551,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 551
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11570,7 +11570,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 552
-      helas_FFV1_0( w_fp[3], w_fp[98], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[98], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11585,7 +11585,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 553
-      helas_FFV1_0( w_fp[99], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11600,7 +11600,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 554
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11619,7 +11619,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 555
-      helas_FFV1_0( w_fp[46], w_fp[112], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[112], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11634,7 +11634,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 556
-      helas_FFV1_0( w_fp[114], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[114], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11646,10 +11646,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 557 OF 1240 ***
 
       // Wavefunction(s) for diagram number 557
-      helas_VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[86] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 557
-      helas_VVV1_0( w_fp[86], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11676,7 +11676,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 558
-      helas_VVV1_0( w_fp[86], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11703,7 +11703,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 559
-      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11723,7 +11723,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11743,7 +11743,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11770,7 +11770,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 560
-      helas_VVV1_0( w_fp[102], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[102], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11797,7 +11797,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 561
-      helas_VVV1_0( w_fp[102], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[102], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11824,7 +11824,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 562
-      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11844,7 +11844,7 @@ namespace mg5amcCpu
       jamp_sv[109] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11864,7 +11864,7 @@ namespace mg5amcCpu
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11891,7 +11891,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 563
-      helas_VVV1_0( w_fp[104], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11918,7 +11918,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 564
-      helas_VVV1_0( w_fp[104], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11945,7 +11945,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 565
-      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11965,7 +11965,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -11985,7 +11985,7 @@ namespace mg5amcCpu
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12009,12 +12009,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 566 OF 1240 ***
 
       // Wavefunction(s) for diagram number 566
-      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] );
-      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] );
-      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] );
+      helas_CD_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] );
+      helas_CD_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] );
+      helas_CD_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] );
 
       // Amplitude(s) for diagram number 566
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12034,7 +12034,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[114] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12054,7 +12054,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12078,12 +12078,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 567 OF 1240 ***
 
       // Wavefunction(s) for diagram number 567
-      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] );
-      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] );
-      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] );
+      helas_CD_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] );
+      helas_CD_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] );
+      helas_CD_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] );
 
       // Amplitude(s) for diagram number 567
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12103,7 +12103,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[103] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12123,7 +12123,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12150,7 +12150,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 568
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12170,7 +12170,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12190,7 +12190,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12217,7 +12217,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 569
-      helas_VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12237,7 +12237,7 @@ namespace mg5amcCpu
       jamp_sv[110] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12257,7 +12257,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
-      helas_VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12284,7 +12284,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 570
-      helas_VVV1_0( w_fp[8], w_fp[27], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[27], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12311,7 +12311,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 571
-      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[27], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12338,7 +12338,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 572
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12365,7 +12365,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 573
-      helas_VVV1_0( w_fp[86], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12384,7 +12384,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 574
-      helas_FFV1_0( w_fp[3], w_fp[36], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[36], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12399,7 +12399,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 575
-      helas_FFV1_0( w_fp[99], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12412,7 +12412,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 576
-      helas_FFV1_0( w_fp[99], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12425,7 +12425,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 577
-      helas_FFV1_0( w_fp[3], w_fp[100], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[100], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12440,7 +12440,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 578
-      helas_VVV1_0( w_fp[104], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12459,7 +12459,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 579
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12471,7 +12471,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12483,7 +12483,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12502,7 +12502,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 580
-      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12517,7 +12517,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 581
-      helas_FFV1_0( w_fp[41], w_fp[98], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[98], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12530,7 +12530,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 582
-      helas_FFV1_0( w_fp[113], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[113], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12543,7 +12543,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 583
-      helas_VVV1_0( w_fp[86], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12562,7 +12562,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 584
-      helas_FFV1_0( w_fp[3], w_fp[49], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[49], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12577,7 +12577,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 585
-      helas_FFV1_0( w_fp[99], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12590,7 +12590,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 586
-      helas_FFV1_0( w_fp[99], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12603,7 +12603,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 587
-      helas_FFV1_0( w_fp[3], w_fp[91], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[91], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12618,7 +12618,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 588
-      helas_VVV1_0( w_fp[102], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[102], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12637,7 +12637,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 589
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12649,7 +12649,7 @@ namespace mg5amcCpu
       jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12661,7 +12661,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12680,7 +12680,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 590
-      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[47], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12695,7 +12695,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 591
-      helas_FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12708,7 +12708,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 592
-      helas_FFV1_0( w_fp[114], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[114], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12721,7 +12721,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 593
-      helas_VVV1_0( w_fp[86], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12740,7 +12740,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 594
-      helas_FFV1_0( w_fp[53], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12755,7 +12755,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 595
-      helas_FFV1_0( w_fp[78], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[112], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12768,7 +12768,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 596
-      helas_FFV1_0( w_fp[53], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12781,7 +12781,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 597
-      helas_FFV1_0( w_fp[78], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12796,7 +12796,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 598
-      helas_VVV1_0( w_fp[104], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12815,7 +12815,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 599
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12827,7 +12827,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12839,7 +12839,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12858,7 +12858,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 600
-      helas_VVV1_0( w_fp[86], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12877,7 +12877,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 601
-      helas_FFV1_0( w_fp[28], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12892,7 +12892,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 602
-      helas_FFV1_0( w_fp[60], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[112], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12905,7 +12905,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 603
-      helas_FFV1_0( w_fp[28], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12918,7 +12918,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 604
-      helas_FFV1_0( w_fp[60], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12933,7 +12933,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 605
-      helas_VVV1_0( w_fp[102], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[102], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12952,7 +12952,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 606
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12964,7 +12964,7 @@ namespace mg5amcCpu
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12976,7 +12976,7 @@ namespace mg5amcCpu
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -12995,7 +12995,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 607
-      helas_FFV1_0( w_fp[3], w_fp[15], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[15], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13014,7 +13014,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 608
-      helas_FFV1_0( w_fp[14], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13033,7 +13033,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 609
-      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[112], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13052,7 +13052,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 610
-      helas_FFV1_0( w_fp[14], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[112], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13067,7 +13067,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 611
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13086,7 +13086,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 612
-      helas_FFV1_0( w_fp[99], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13101,7 +13101,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 613
-      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[112], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13113,7 +13113,7 @@ namespace mg5amcCpu
       jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[112], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13125,7 +13125,7 @@ namespace mg5amcCpu
       jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[112], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[112], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13144,7 +13144,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 614
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13156,7 +13156,7 @@ namespace mg5amcCpu
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13168,7 +13168,7 @@ namespace mg5amcCpu
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13187,7 +13187,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 615
-      helas_VVV1_0( w_fp[92], w_fp[57], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[57], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13207,7 +13207,7 @@ namespace mg5amcCpu
       jamp_sv[77] += amp_sv[0];
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      helas_VVV1_0( w_fp[92], w_fp[81], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[81], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13227,7 +13227,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
-      helas_VVV1_0( w_fp[92], w_fp[82], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[82], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13251,11 +13251,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 616 OF 1240 ***
 
       // Wavefunction(s) for diagram number 616
-      helas_VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[92] );
-      helas_FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[92] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 616
-      helas_FFV1_0( w_fp[99], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13268,7 +13268,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 617
-      helas_FFV1_0( w_fp[99], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13278,10 +13278,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 618 OF 1240 ***
 
       // Wavefunction(s) for diagram number 618
-      helas_VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[112] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[112] );
 
       // Amplitude(s) for diagram number 618
-      helas_VVV1_0( w_fp[112], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[112], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13300,7 +13300,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 619
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13312,10 +13312,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 620 OF 1240 ***
 
       // Wavefunction(s) for diagram number 620
-      helas_VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[86] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 620
-      helas_VVV1_0( w_fp[86], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13334,7 +13334,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 621
-      helas_FFV1_0( w_fp[3], w_fp[87], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[87], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13346,12 +13346,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 622 OF 1240 ***
 
       // Wavefunction(s) for diagram number 622
-      helas_VVVV1P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[107] );
-      helas_VVVV3P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[95] );
-      helas_VVVV4P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[105] );
+      helas_CD_VVVV1P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[107] );
+      helas_CD_VVVV3P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[95] );
+      helas_CD_VVVV4P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[105] );
 
       // Amplitude(s) for diagram number 622
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13363,7 +13363,7 @@ namespace mg5amcCpu
       jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13375,7 +13375,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13391,10 +13391,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 623 OF 1240 ***
 
       // Wavefunction(s) for diagram number 623
-      helas_FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
+      helas_CD_FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
 
       // Amplitude(s) for diagram number 623
-      helas_FFV1_0( w_fp[46], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[102], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13404,10 +13404,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 624 OF 1240 ***
 
       // Wavefunction(s) for diagram number 624
-      helas_FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] );
+      helas_CD_FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] );
 
       // Amplitude(s) for diagram number 624
-      helas_FFV1_0( w_fp[88], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13420,7 +13420,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 625
-      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13435,7 +13435,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 626
-      helas_FFV1_0( w_fp[38], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13445,10 +13445,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 627 OF 1240 ***
 
       // Wavefunction(s) for diagram number 627
-      helas_FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
+      helas_CD_FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
 
       // Amplitude(s) for diagram number 627
-      helas_FFV1_0( w_fp[90], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13461,7 +13461,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 628
-      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[77], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13476,7 +13476,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 629
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13491,7 +13491,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 630
-      helas_FFV1_0( w_fp[99], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13503,10 +13503,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 631 OF 1240 ***
 
       // Wavefunction(s) for diagram number 631
-      helas_VVV1P0_1( w_fp[92], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[102] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[102] );
 
       // Amplitude(s) for diagram number 631
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13522,10 +13522,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 632 OF 1240 ***
 
       // Wavefunction(s) for diagram number 632
-      helas_FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[96] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[96] );
 
       // Amplitude(s) for diagram number 632
-      helas_FFV1_0( w_fp[56], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13538,7 +13538,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 633
-      helas_FFV1_0( w_fp[22], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13551,7 +13551,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 634
-      helas_VVV1_0( w_fp[112], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[112], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13570,7 +13570,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 635
-      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13585,7 +13585,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 636
-      helas_VVV1_0( w_fp[86], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13604,7 +13604,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 637
-      helas_FFV1_0( w_fp[56], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13619,7 +13619,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 638
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13631,7 +13631,7 @@ namespace mg5amcCpu
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13643,7 +13643,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13659,10 +13659,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 639 OF 1240 ***
 
       // Wavefunction(s) for diagram number 639
-      helas_FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
+      helas_CD_FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
 
       // Amplitude(s) for diagram number 639
-      helas_FFV1_0( w_fp[104], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13672,10 +13672,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 640 OF 1240 ***
 
       // Wavefunction(s) for diagram number 640
-      helas_FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
+      helas_CD_FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
 
       // Amplitude(s) for diagram number 640
-      helas_FFV1_0( w_fp[52], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13688,7 +13688,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 641
-      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[33], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13703,7 +13703,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 642
-      helas_FFV1_0( w_fp[104], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13713,10 +13713,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 643 OF 1240 ***
 
       // Wavefunction(s) for diagram number 643
-      helas_FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
+      helas_CD_FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] );
 
       // Amplitude(s) for diagram number 643
-      helas_FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13729,7 +13729,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 644
-      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[39], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13744,7 +13744,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 645
-      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13759,7 +13759,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 646
-      helas_FFV1_0( w_fp[52], w_fp[96], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[96], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13774,7 +13774,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 647
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[102], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13793,7 +13793,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 648
-      helas_FFV1_0( w_fp[65], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13808,7 +13808,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 649
-      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[96], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13827,7 +13827,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 650
-      helas_FFV1_0( w_fp[99], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13842,7 +13842,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 651
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13861,7 +13861,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 652
-      helas_FFV1_0( w_fp[3], w_fp[93], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[93], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13880,7 +13880,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 653
-      helas_FFV1_0( w_fp[65], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13899,7 +13899,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 654
-      helas_VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13919,7 +13919,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[96] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      helas_VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13939,7 +13939,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13963,10 +13963,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 655 OF 1240 ***
 
       // Wavefunction(s) for diagram number 655
-      helas_VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 655
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -13990,10 +13990,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 656 OF 1240 ***
 
       // Wavefunction(s) for diagram number 656
-      helas_VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[113] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[113] );
 
       // Amplitude(s) for diagram number 656
-      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[5], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14020,7 +14020,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 657
-      helas_VVV1_0( w_fp[61], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14047,7 +14047,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 658
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14066,7 +14066,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 659
-      helas_FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14081,7 +14081,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 660
-      helas_FFV1_0( w_fp[99], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14096,7 +14096,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 661
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14115,7 +14115,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 662
-      helas_FFV1_0( w_fp[38], w_fp[96], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[96], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14130,7 +14130,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 663
-      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14145,7 +14145,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 664
-      helas_FFV1_0( w_fp[71], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14160,7 +14160,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 665
-      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[96], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14179,7 +14179,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 666
-      helas_FFV1_0( w_fp[99], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14194,7 +14194,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 667
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14213,7 +14213,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 668
-      helas_FFV1_0( w_fp[3], w_fp[94], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[94], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14232,7 +14232,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 669
-      helas_FFV1_0( w_fp[71], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14251,7 +14251,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 670
-      helas_VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14271,7 +14271,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      helas_VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14291,7 +14291,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      helas_VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14315,10 +14315,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 671 OF 1240 ***
 
       // Wavefunction(s) for diagram number 671
-      helas_VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 671
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14345,7 +14345,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 672
-      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[4], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14372,7 +14372,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 673
-      helas_VVV1_0( w_fp[66], w_fp[8], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[8], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14399,7 +14399,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 674
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14418,7 +14418,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 675
-      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[114], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14433,7 +14433,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 676
-      helas_FFV1_0( w_fp[99], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14448,7 +14448,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 677
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14467,7 +14467,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 678
-      helas_FFV1_0( w_fp[46], w_fp[96], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[96], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14482,7 +14482,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 679
-      helas_FFV1_0( w_fp[88], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14494,10 +14494,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 680 OF 1240 ***
 
       // Wavefunction(s) for diagram number 680
-      helas_VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_CD_VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] );
 
       // Amplitude(s) for diagram number 680
-      helas_VVV1_0( w_fp[104], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14524,7 +14524,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 681
-      helas_VVV1_0( w_fp[104], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14551,7 +14551,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 682
-      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14571,7 +14571,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[97] += amp_sv[0];
-      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14591,7 +14591,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[96] += amp_sv[0];
-      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14618,7 +14618,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 683
-      helas_VVV1_0( w_fp[112], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[112], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14645,7 +14645,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 684
-      helas_VVV1_0( w_fp[112], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[112], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14672,7 +14672,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 685
-      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14692,7 +14692,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14712,7 +14712,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14739,7 +14739,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 686
-      helas_VVV1_0( w_fp[86], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14766,7 +14766,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 687
-      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14793,7 +14793,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 688
-      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14813,7 +14813,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14833,7 +14833,7 @@ namespace mg5amcCpu
       jamp_sv[91] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14857,12 +14857,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 689 OF 1240 ***
 
       // Wavefunction(s) for diagram number 689
-      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[98] );
-      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[62] );
-      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[101] );
+      helas_CD_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[98] );
+      helas_CD_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[62] );
+      helas_CD_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[101] );
 
       // Amplitude(s) for diagram number 689
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14882,7 +14882,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[96] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14902,7 +14902,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14926,12 +14926,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 690 OF 1240 ***
 
       // Wavefunction(s) for diagram number 690
-      helas_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
-      helas_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
-      helas_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_CD_VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
+      helas_CD_VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
+      helas_CD_VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 690
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14951,7 +14951,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14971,7 +14971,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -14998,7 +14998,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 691
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15018,7 +15018,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[99] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15038,7 +15038,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15065,7 +15065,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 692
-      helas_VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15085,7 +15085,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[96] -= amp_sv[0];
       jamp_sv[97] += amp_sv[0];
-      helas_VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15105,7 +15105,7 @@ namespace mg5amcCpu
       jamp_sv[97] += amp_sv[0];
       jamp_sv[99] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15132,7 +15132,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 693
-      helas_VVV1_0( w_fp[8], w_fp[24], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[24], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15159,7 +15159,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 694
-      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[24], w_fp[113], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15186,7 +15186,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 695
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[102], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[102], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15213,7 +15213,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 696
-      helas_VVV1_0( w_fp[104], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15232,7 +15232,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 697
-      helas_FFV1_0( w_fp[3], w_fp[35], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[35], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15247,7 +15247,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 698
-      helas_FFV1_0( w_fp[99], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15260,7 +15260,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 699
-      helas_FFV1_0( w_fp[99], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15273,7 +15273,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 700
-      helas_FFV1_0( w_fp[3], w_fp[100], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[100], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15288,7 +15288,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 701
-      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15307,7 +15307,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 702
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15319,7 +15319,7 @@ namespace mg5amcCpu
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15331,7 +15331,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15350,7 +15350,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 703
-      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15365,7 +15365,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 704
-      helas_FFV1_0( w_fp[38], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15378,7 +15378,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 705
-      helas_FFV1_0( w_fp[90], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15391,7 +15391,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 706
-      helas_VVV1_0( w_fp[104], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15410,7 +15410,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 707
-      helas_FFV1_0( w_fp[3], w_fp[43], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[43], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15425,7 +15425,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 708
-      helas_FFV1_0( w_fp[99], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15438,7 +15438,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 709
-      helas_FFV1_0( w_fp[99], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15451,7 +15451,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 710
-      helas_FFV1_0( w_fp[3], w_fp[89], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[89], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15466,7 +15466,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 711
-      helas_VVV1_0( w_fp[112], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[112], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15485,7 +15485,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 712
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15497,7 +15497,7 @@ namespace mg5amcCpu
       jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15509,7 +15509,7 @@ namespace mg5amcCpu
       jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15528,7 +15528,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 713
-      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[39], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15543,7 +15543,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 714
-      helas_FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15556,7 +15556,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 715
-      helas_FFV1_0( w_fp[88], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[88], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15569,7 +15569,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 716
-      helas_VVV1_0( w_fp[104], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15588,7 +15588,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 717
-      helas_FFV1_0( w_fp[7], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15603,7 +15603,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 718
-      helas_FFV1_0( w_fp[78], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[96], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15616,7 +15616,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 719
-      helas_FFV1_0( w_fp[7], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15629,7 +15629,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 720
-      helas_FFV1_0( w_fp[78], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15644,7 +15644,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 721
-      helas_VVV1_0( w_fp[86], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[86], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15663,7 +15663,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 722
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15675,7 +15675,7 @@ namespace mg5amcCpu
       jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15687,7 +15687,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15706,7 +15706,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 723
-      helas_VVV1_0( w_fp[104], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[104], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15725,7 +15725,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 724
-      helas_FFV1_0( w_fp[25], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15740,7 +15740,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 725
-      helas_FFV1_0( w_fp[58], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[96], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15753,7 +15753,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 726
-      helas_FFV1_0( w_fp[25], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15766,7 +15766,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 727
-      helas_FFV1_0( w_fp[58], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15781,7 +15781,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 728
-      helas_VVV1_0( w_fp[112], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[112], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15800,7 +15800,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 729
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15812,7 +15812,7 @@ namespace mg5amcCpu
       jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15824,7 +15824,7 @@ namespace mg5amcCpu
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15843,7 +15843,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 730
-      helas_FFV1_0( w_fp[3], w_fp[17], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[17], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15862,7 +15862,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 731
-      helas_FFV1_0( w_fp[26], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15881,7 +15881,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 732
-      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[96], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15900,7 +15900,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 733
-      helas_FFV1_0( w_fp[26], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[96], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15915,7 +15915,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 734
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15934,7 +15934,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 735
-      helas_FFV1_0( w_fp[99], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15949,7 +15949,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 736
-      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[96], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15961,7 +15961,7 @@ namespace mg5amcCpu
       jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[96], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15973,7 +15973,7 @@ namespace mg5amcCpu
       jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[96], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[96], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -15992,7 +15992,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 737
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16004,7 +16004,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16016,7 +16016,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[81] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16035,7 +16035,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 738
-      helas_VVV1_0( w_fp[92], w_fp[73], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[73], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16055,7 +16055,7 @@ namespace mg5amcCpu
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      helas_VVV1_0( w_fp[92], w_fp[79], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[79], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16075,7 +16075,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      helas_VVV1_0( w_fp[92], w_fp[80], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[92], w_fp[80], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16099,10 +16099,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 739 OF 1240 ***
 
       // Wavefunction(s) for diagram number 739
-      helas_FFV1_1( w_fp[77], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[92] );
+      helas_CD_FFV1_1( w_fp[77], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[92] );
 
       // Amplitude(s) for diagram number 739
-      helas_FFV1_0( w_fp[7], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16114,7 +16114,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 740
-      helas_FFV1_0( w_fp[53], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16123,10 +16123,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 741 OF 1240 ***
 
       // Wavefunction(s) for diagram number 741
-      helas_FFV1_2( w_fp[46], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
+      helas_CD_FFV1_2( w_fp[46], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] );
 
       // Amplitude(s) for diagram number 741
-      helas_FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16138,7 +16138,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 742
-      helas_FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16150,7 +16150,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 743
-      helas_FFV1_0( w_fp[53], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16162,7 +16162,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 744
-      helas_FFV1_0( w_fp[7], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16174,7 +16174,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 745
-      helas_FFV1_0( w_fp[46], w_fp[92], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[92], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16187,7 +16187,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 746
-      helas_FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16197,10 +16197,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 747 OF 1240 ***
 
       // Wavefunction(s) for diagram number 747
-      helas_VVV1P0_1( w_fp[0], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[96] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[96] );
 
       // Amplitude(s) for diagram number 747
-      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[77], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16215,7 +16215,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 748
-      helas_FFV1_0( w_fp[25], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16227,7 +16227,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 749
-      helas_FFV1_0( w_fp[48], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16236,10 +16236,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 750 OF 1240 ***
 
       // Wavefunction(s) for diagram number 750
-      helas_FFV1_2( w_fp[38], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
+      helas_CD_FFV1_2( w_fp[38], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] );
 
       // Amplitude(s) for diagram number 750
-      helas_FFV1_0( w_fp[104], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[87], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16251,7 +16251,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 751
-      helas_FFV1_0( w_fp[104], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[85], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16263,7 +16263,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 752
-      helas_FFV1_0( w_fp[48], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16275,7 +16275,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 753
-      helas_FFV1_0( w_fp[25], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16287,7 +16287,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 754
-      helas_FFV1_0( w_fp[38], w_fp[92], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[92], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16300,7 +16300,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 755
-      helas_FFV1_0( w_fp[104], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16310,10 +16310,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 756 OF 1240 ***
 
       // Wavefunction(s) for diagram number 756
-      helas_VVV1P0_1( w_fp[0], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[101] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[101] );
 
       // Amplitude(s) for diagram number 756
-      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[77], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16328,7 +16328,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 757
-      helas_FFV1_0( w_fp[28], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16340,7 +16340,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 758
-      helas_FFV1_0( w_fp[40], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16349,10 +16349,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 759 OF 1240 ***
 
       // Wavefunction(s) for diagram number 759
-      helas_FFV1_2( w_fp[41], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
+      helas_CD_FFV1_2( w_fp[41], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] );
 
       // Amplitude(s) for diagram number 759
-      helas_FFV1_0( w_fp[62], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[87], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16364,7 +16364,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 760
-      helas_FFV1_0( w_fp[62], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16376,7 +16376,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 761
-      helas_FFV1_0( w_fp[40], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16388,7 +16388,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 762
-      helas_FFV1_0( w_fp[28], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16400,7 +16400,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 763
-      helas_FFV1_0( w_fp[41], w_fp[92], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[92], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16413,7 +16413,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 764
-      helas_FFV1_0( w_fp[62], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16423,10 +16423,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 765 OF 1240 ***
 
       // Wavefunction(s) for diagram number 765
-      helas_VVV1P0_1( w_fp[0], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[98] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[98] );
 
       // Amplitude(s) for diagram number 765
-      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[77], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16441,7 +16441,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 766
-      helas_FFV1_0( w_fp[26], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[92], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16454,7 +16454,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 767
-      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[92], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16469,7 +16469,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 768
-      helas_VVV1_0( w_fp[98], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[98], w_fp[34], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16488,7 +16488,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 769
-      helas_FFV1_0( w_fp[3], w_fp[85], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[85], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16503,7 +16503,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 770
-      helas_VVV1_0( w_fp[0], w_fp[34], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[34], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16522,7 +16522,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 771
-      helas_FFV1_0( w_fp[26], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[85], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16532,12 +16532,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 772 OF 1240 ***
 
       // Wavefunction(s) for diagram number 772
-      helas_VVVV1P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[85] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[85] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 772
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[85], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[85], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16549,7 +16549,7 @@ namespace mg5amcCpu
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16561,7 +16561,7 @@ namespace mg5amcCpu
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16580,7 +16580,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 773
-      helas_FFV1_0( w_fp[14], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[92], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16593,7 +16593,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 774
-      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[92], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16608,7 +16608,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 775
-      helas_VVV1_0( w_fp[101], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[101], w_fp[34], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16627,7 +16627,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 776
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16642,7 +16642,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 777
-      helas_VVV1_0( w_fp[0], w_fp[34], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[34], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16661,7 +16661,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 778
-      helas_FFV1_0( w_fp[14], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[9], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16671,12 +16671,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 779 OF 1240 ***
 
       // Wavefunction(s) for diagram number 779
-      helas_VVVV1P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[9] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[9] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] );
 
       // Amplitude(s) for diagram number 779
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16688,7 +16688,7 @@ namespace mg5amcCpu
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16700,7 +16700,7 @@ namespace mg5amcCpu
       jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16719,7 +16719,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 780
-      helas_FFV1_0( w_fp[12], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[92], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16732,7 +16732,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 781
-      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[92], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16747,7 +16747,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 782
-      helas_VVV1_0( w_fp[96], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[96], w_fp[34], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16766,7 +16766,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 783
-      helas_FFV1_0( w_fp[3], w_fp[87], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[87], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16781,7 +16781,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 784
-      helas_VVV1_0( w_fp[0], w_fp[34], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[34], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16800,7 +16800,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 785
-      helas_FFV1_0( w_fp[12], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[87], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16810,12 +16810,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 786 OF 1240 ***
 
       // Wavefunction(s) for diagram number 786
-      helas_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[87] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[34] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[86] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[87] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[34] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[86] );
 
       // Amplitude(s) for diagram number 786
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[87], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[87], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16827,7 +16827,7 @@ namespace mg5amcCpu
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[34], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[34], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16839,7 +16839,7 @@ namespace mg5amcCpu
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[40] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16858,7 +16858,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 787
-      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[92], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16866,7 +16866,7 @@ namespace mg5amcCpu
       jamp_sv[25] -= amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
       jamp_sv[29] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[92], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16874,7 +16874,7 @@ namespace mg5amcCpu
       jamp_sv[26] += amp_sv[0];
       jamp_sv[27] -= amp_sv[0];
       jamp_sv[28] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[92], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[92], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16886,12 +16886,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 788 OF 1240 ***
 
       // Wavefunction(s) for diagram number 788
-      helas_VVV1P0_1( w_fp[0], w_fp[30], COUPs[0], 1.0, 0., 0., w_fp[92] );
-      helas_VVV1P0_1( w_fp[0], w_fp[31], COUPs[0], 1.0, 0., 0., w_fp[88] );
-      helas_VVV1P0_1( w_fp[0], w_fp[32], COUPs[0], 1.0, 0., 0., w_fp[106] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[30], COUPs[0], 1.0, 0., 0., w_fp[92] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[31], COUPs[0], 1.0, 0., 0., w_fp[88] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[32], COUPs[0], 1.0, 0., 0., w_fp[106] );
 
       // Amplitude(s) for diagram number 788
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[92], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[92], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16903,7 +16903,7 @@ namespace mg5amcCpu
       jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16915,7 +16915,7 @@ namespace mg5amcCpu
       jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[106], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[106], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16931,10 +16931,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 789 OF 1240 ***
 
       // Wavefunction(s) for diagram number 789
-      helas_FFV1_2( w_fp[52], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
+      helas_CD_FFV1_2( w_fp[52], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] );
 
       // Amplitude(s) for diagram number 789
-      helas_FFV1_0( w_fp[90], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[35], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16946,7 +16946,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 790
-      helas_FFV1_0( w_fp[90], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[36], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16955,10 +16955,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 791 OF 1240 ***
 
       // Wavefunction(s) for diagram number 791
-      helas_FFV1_1( w_fp[33], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
+      helas_CD_FFV1_1( w_fp[33], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] );
 
       // Amplitude(s) for diagram number 791
-      helas_FFV1_0( w_fp[22], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16970,7 +16970,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 792
-      helas_FFV1_0( w_fp[21], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16982,7 +16982,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 793
-      helas_FFV1_0( w_fp[22], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -16994,7 +16994,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 794
-      helas_FFV1_0( w_fp[21], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17006,7 +17006,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 795
-      helas_FFV1_0( w_fp[90], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17019,7 +17019,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 796
-      helas_FFV1_0( w_fp[52], w_fp[114], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[114], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17032,7 +17032,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 797
-      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[33], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17047,7 +17047,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 798
-      helas_FFV1_0( w_fp[90], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[43], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17059,7 +17059,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 799
-      helas_FFV1_0( w_fp[90], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[44], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17068,10 +17068,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 800 OF 1240 ***
 
       // Wavefunction(s) for diagram number 800
-      helas_FFV1_1( w_fp[39], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
+      helas_CD_FFV1_1( w_fp[39], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] );
 
       // Amplitude(s) for diagram number 800
-      helas_FFV1_0( w_fp[56], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17083,7 +17083,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 801
-      helas_FFV1_0( w_fp[21], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17095,7 +17095,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 802
-      helas_FFV1_0( w_fp[56], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17107,7 +17107,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 803
-      helas_FFV1_0( w_fp[21], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17119,7 +17119,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 804
-      helas_FFV1_0( w_fp[90], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17132,7 +17132,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 805
-      helas_FFV1_0( w_fp[52], w_fp[102], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[102], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17145,7 +17145,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 806
-      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[39], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17160,7 +17160,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 807
-      helas_FFV1_0( w_fp[90], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[49], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17172,7 +17172,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 808
-      helas_FFV1_0( w_fp[90], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[50], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17181,10 +17181,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 809 OF 1240 ***
 
       // Wavefunction(s) for diagram number 809
-      helas_FFV1_1( w_fp[47], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] );
+      helas_CD_FFV1_1( w_fp[47], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] );
 
       // Amplitude(s) for diagram number 809
-      helas_FFV1_0( w_fp[56], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17196,7 +17196,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 810
-      helas_FFV1_0( w_fp[22], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17208,7 +17208,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 811
-      helas_FFV1_0( w_fp[56], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17220,7 +17220,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 812
-      helas_FFV1_0( w_fp[22], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17232,7 +17232,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 813
-      helas_FFV1_0( w_fp[90], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17245,7 +17245,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 814
-      helas_FFV1_0( w_fp[52], w_fp[113], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[113], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17258,7 +17258,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 815
-      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[47], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17273,7 +17273,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 816
-      helas_FFV1_0( w_fp[90], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17286,7 +17286,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 817
-      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17301,7 +17301,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 818
-      helas_VVV1_0( w_fp[98], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[98], w_fp[103], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17320,7 +17320,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 819
-      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17335,7 +17335,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 820
-      helas_VVV1_0( w_fp[0], w_fp[103], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[103], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17354,7 +17354,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 821
-      helas_FFV1_0( w_fp[21], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17367,7 +17367,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 822
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[85], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[85], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17379,7 +17379,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[112], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17391,7 +17391,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17410,7 +17410,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 823
-      helas_FFV1_0( w_fp[90], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[15], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17423,7 +17423,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 824
-      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17438,7 +17438,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 825
-      helas_VVV1_0( w_fp[101], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[101], w_fp[103], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17457,7 +17457,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 826
-      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17472,7 +17472,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 827
-      helas_VVV1_0( w_fp[0], w_fp[103], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[103], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17491,7 +17491,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 828
-      helas_FFV1_0( w_fp[22], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17504,7 +17504,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 829
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17516,7 +17516,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17528,7 +17528,7 @@ namespace mg5amcCpu
       jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17547,7 +17547,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 830
-      helas_FFV1_0( w_fp[90], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[18], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17560,7 +17560,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 831
-      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17575,7 +17575,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 832
-      helas_VVV1_0( w_fp[96], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[96], w_fp[103], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17594,7 +17594,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 833
-      helas_FFV1_0( w_fp[56], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17609,7 +17609,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 834
-      helas_VVV1_0( w_fp[0], w_fp[103], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[103], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17628,7 +17628,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 835
-      helas_FFV1_0( w_fp[56], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[56], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17641,7 +17641,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 836
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[87], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[87], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17653,7 +17653,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[34], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[34], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17665,7 +17665,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[86], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17684,7 +17684,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 837
-      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[2], w_fp[30], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17692,7 +17692,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[2], w_fp[31], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17700,7 +17700,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      helas_FFV1_0( w_fp[90], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[90], w_fp[2], w_fp[32], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17715,7 +17715,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 838
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[92], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[92], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17727,7 +17727,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17739,7 +17739,7 @@ namespace mg5amcCpu
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[106], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[106], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17755,10 +17755,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 839 OF 1240 ***
 
       // Wavefunction(s) for diagram number 839
-      helas_VVV1P0_1( w_fp[0], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[90] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[90] );
 
       // Amplitude(s) for diagram number 839
-      helas_VVV1_0( w_fp[90], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[90], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17785,7 +17785,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 840
-      helas_VVV1_0( w_fp[90], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[90], w_fp[11], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17812,7 +17812,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 841
-      helas_VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17832,7 +17832,7 @@ namespace mg5amcCpu
       jamp_sv[98] -= amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      helas_VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17852,7 +17852,7 @@ namespace mg5amcCpu
       jamp_sv[115] += amp_sv[0];
       jamp_sv[117] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17876,10 +17876,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 842 OF 1240 ***
 
       // Wavefunction(s) for diagram number 842
-      helas_VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[56] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[56] );
 
       // Amplitude(s) for diagram number 842
-      helas_VVV1_0( w_fp[56], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[63], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17906,7 +17906,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 843
-      helas_VVV1_0( w_fp[56], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[64], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17933,7 +17933,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 844
-      helas_VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17953,7 +17953,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -17973,7 +17973,7 @@ namespace mg5amcCpu
       jamp_sv[95] += amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18000,7 +18000,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 845
-      helas_VVV1_0( w_fp[0], w_fp[63], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[63], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18027,7 +18027,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 846
-      helas_VVV1_0( w_fp[0], w_fp[64], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[64], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18051,12 +18051,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 847 OF 1240 ***
 
       // Wavefunction(s) for diagram number 847
-      helas_VVVV1P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[103] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[22] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[103] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[22] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 847
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[103], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[103], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18076,7 +18076,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18096,7 +18096,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18120,12 +18120,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 848 OF 1240 ***
 
       // Wavefunction(s) for diagram number 848
-      helas_VVVV1P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[105] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[95] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[107] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[105] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[95] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[107] );
 
       // Amplitude(s) for diagram number 848
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18145,7 +18145,7 @@ namespace mg5amcCpu
       jamp_sv[95] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18165,7 +18165,7 @@ namespace mg5amcCpu
       jamp_sv[91] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[98] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18189,12 +18189,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 849 OF 1240 ***
 
       // Wavefunction(s) for diagram number 849
-      helas_VVVV1P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[115] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[116] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[117] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[115] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[116] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[117] );
 
       // Amplitude(s) for diagram number 849
-      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[6], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18214,7 +18214,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[6], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18234,7 +18234,7 @@ namespace mg5amcCpu
       jamp_sv[105] += amp_sv[0];
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      helas_VVV1_0( w_fp[61], w_fp[6], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[6], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18258,12 +18258,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 850 OF 1240 ***
 
       // Wavefunction(s) for diagram number 850
-      helas_VVVV1P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[118] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[119] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[120] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[118] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[119] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[120] );
 
       // Amplitude(s) for diagram number 850
-      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[5], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18283,7 +18283,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[117] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[5], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18303,7 +18303,7 @@ namespace mg5amcCpu
       jamp_sv[98] -= amp_sv[0];
       jamp_sv[100] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      helas_VVV1_0( w_fp[61], w_fp[5], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[5], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18330,7 +18330,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 851
-      helas_VVVV1_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18350,7 +18350,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18370,7 +18370,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18397,7 +18397,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 852
-      helas_VVV1_0( w_fp[8], w_fp[29], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[29], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18424,7 +18424,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 853
-      helas_VVV1_0( w_fp[61], w_fp[29], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[29], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18451,7 +18451,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 854
-      helas_VVV1_0( w_fp[61], w_fp[8], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[61], w_fp[8], w_fp[96], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18478,7 +18478,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 855
-      helas_VVV1_0( w_fp[90], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[90], w_fp[45], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18497,7 +18497,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 856
-      helas_FFV1_0( w_fp[3], w_fp[44], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[44], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18512,7 +18512,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 857
-      helas_FFV1_0( w_fp[65], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18525,7 +18525,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 858
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18540,7 +18540,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 859
-      helas_FFV1_0( w_fp[65], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18553,7 +18553,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 860
-      helas_VVV1_0( w_fp[0], w_fp[64], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[64], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18572,7 +18572,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 861
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18584,7 +18584,7 @@ namespace mg5amcCpu
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18596,7 +18596,7 @@ namespace mg5amcCpu
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18615,7 +18615,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 862
-      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[39], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18630,7 +18630,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 863
-      helas_FFV1_0( w_fp[41], w_fp[102], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[102], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18643,7 +18643,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 864
-      helas_FFV1_0( w_fp[62], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[39], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18656,7 +18656,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 865
-      helas_VVV1_0( w_fp[90], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[90], w_fp[51], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18675,7 +18675,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 866
-      helas_FFV1_0( w_fp[3], w_fp[50], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[50], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18690,7 +18690,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 867
-      helas_FFV1_0( w_fp[65], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18703,7 +18703,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 868
-      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[113], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18718,7 +18718,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 869
-      helas_FFV1_0( w_fp[65], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18731,7 +18731,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 870
-      helas_VVV1_0( w_fp[0], w_fp[63], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[63], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18750,7 +18750,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 871
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18762,7 +18762,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18774,7 +18774,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18793,7 +18793,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 872
-      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[47], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18808,7 +18808,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 873
-      helas_FFV1_0( w_fp[38], w_fp[113], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[113], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18821,7 +18821,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 874
-      helas_FFV1_0( w_fp[104], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[47], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18834,7 +18834,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 875
-      helas_VVV1_0( w_fp[90], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[90], w_fp[23], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18853,7 +18853,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 876
-      helas_FFV1_0( w_fp[48], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18868,7 +18868,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 877
-      helas_FFV1_0( w_fp[104], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[93], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18881,7 +18881,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 878
-      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[2], w_fp[64], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18896,7 +18896,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 879
-      helas_FFV1_0( w_fp[48], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18909,7 +18909,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 880
-      helas_VVV1_0( w_fp[0], w_fp[64], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[64], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18928,7 +18928,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 881
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[105], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18940,7 +18940,7 @@ namespace mg5amcCpu
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[95], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18952,7 +18952,7 @@ namespace mg5amcCpu
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[107], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18971,7 +18971,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 882
-      helas_VVV1_0( w_fp[90], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[90], w_fp[20], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -18990,7 +18990,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 883
-      helas_FFV1_0( w_fp[40], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19005,7 +19005,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 884
-      helas_FFV1_0( w_fp[62], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[93], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19018,7 +19018,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 885
-      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19033,7 +19033,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 886
-      helas_FFV1_0( w_fp[40], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19046,7 +19046,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 887
-      helas_VVV1_0( w_fp[0], w_fp[63], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[63], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19065,7 +19065,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 888
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19077,7 +19077,7 @@ namespace mg5amcCpu
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19089,7 +19089,7 @@ namespace mg5amcCpu
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19108,7 +19108,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 889
-      helas_FFV1_0( w_fp[3], w_fp[18], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[18], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19127,7 +19127,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 890
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19146,7 +19146,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 891
-      helas_FFV1_0( w_fp[3], w_fp[93], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[93], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19165,7 +19165,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 892
-      helas_FFV1_0( w_fp[65], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19184,7 +19184,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 893
-      helas_FFV1_0( w_fp[12], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[93], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19199,7 +19199,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 894
-      helas_FFV1_0( w_fp[65], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[65], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19211,10 +19211,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 895 OF 1240 ***
 
       // Wavefunction(s) for diagram number 895
-      helas_VVV1P0_1( w_fp[0], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[65] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[65] );
 
       // Amplitude(s) for diagram number 895
-      helas_VVV1_0( w_fp[65], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[65], w_fp[13], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19241,7 +19241,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 896
-      helas_VVV1_0( w_fp[65], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[65], w_fp[11], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19268,7 +19268,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 897
-      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19288,7 +19288,7 @@ namespace mg5amcCpu
       jamp_sv[100] -= amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19308,7 +19308,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19335,7 +19335,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 898
-      helas_VVV1_0( w_fp[56], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[69], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19362,7 +19362,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 899
-      helas_VVV1_0( w_fp[56], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[70], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19389,7 +19389,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 900
-      helas_VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19409,7 +19409,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19429,7 +19429,7 @@ namespace mg5amcCpu
       jamp_sv[83] += amp_sv[0];
       jamp_sv[107] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19456,7 +19456,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 901
-      helas_VVV1_0( w_fp[0], w_fp[69], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[69], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19483,7 +19483,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 902
-      helas_VVV1_0( w_fp[0], w_fp[70], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[70], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19507,12 +19507,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 903 OF 1240 ***
 
       // Wavefunction(s) for diagram number 903
-      helas_VVVV1P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[93] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[90] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[93] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[90] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 903
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[93], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[93], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19532,7 +19532,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19552,7 +19552,7 @@ namespace mg5amcCpu
       jamp_sv[109] += amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19576,12 +19576,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 904 OF 1240 ***
 
       // Wavefunction(s) for diagram number 904
-      helas_VVVV1P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[103] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[63] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[103] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[63] );
 
       // Amplitude(s) for diagram number 904
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19601,7 +19601,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[103], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[103], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19621,7 +19621,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[97] += amp_sv[0];
       jamp_sv[100] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[63], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[63], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19645,12 +19645,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 905 OF 1240 ***
 
       // Wavefunction(s) for diagram number 905
-      helas_VVVV1P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] );
 
       // Amplitude(s) for diagram number 905
-      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19670,7 +19670,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[6], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19690,7 +19690,7 @@ namespace mg5amcCpu
       jamp_sv[107] += amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      helas_VVV1_0( w_fp[66], w_fp[6], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[6], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19717,7 +19717,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 906
-      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[4], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19737,7 +19737,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[4], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19757,7 +19757,7 @@ namespace mg5amcCpu
       jamp_sv[98] -= amp_sv[0];
       jamp_sv[99] += amp_sv[0];
       jamp_sv[100] -= amp_sv[0];
-      helas_VVV1_0( w_fp[66], w_fp[4], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[4], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19784,7 +19784,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 907
-      helas_VVVV1_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19804,7 +19804,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19824,7 +19824,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19851,7 +19851,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 908
-      helas_VVV1_0( w_fp[8], w_fp[27], w_fp[65], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[27], w_fp[65], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19878,7 +19878,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 909
-      helas_VVV1_0( w_fp[66], w_fp[27], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[27], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19905,7 +19905,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 910
-      helas_VVV1_0( w_fp[66], w_fp[8], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[66], w_fp[8], w_fp[101], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19932,7 +19932,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 911
-      helas_VVV1_0( w_fp[65], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[65], w_fp[37], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19951,7 +19951,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 912
-      helas_FFV1_0( w_fp[3], w_fp[36], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[36], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19966,7 +19966,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 913
-      helas_FFV1_0( w_fp[71], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19979,7 +19979,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 914
-      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[114], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -19994,7 +19994,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 915
-      helas_FFV1_0( w_fp[71], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20007,7 +20007,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 916
-      helas_VVV1_0( w_fp[0], w_fp[70], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[70], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20026,7 +20026,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 917
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20038,7 +20038,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20050,7 +20050,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20069,7 +20069,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 918
-      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[33], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20084,7 +20084,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 919
-      helas_FFV1_0( w_fp[41], w_fp[114], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[114], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20097,7 +20097,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 920
-      helas_FFV1_0( w_fp[62], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[33], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20110,7 +20110,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 921
-      helas_VVV1_0( w_fp[65], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[65], w_fp[51], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20129,7 +20129,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 922
-      helas_FFV1_0( w_fp[3], w_fp[49], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[49], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20144,7 +20144,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 923
-      helas_FFV1_0( w_fp[71], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20157,7 +20157,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 924
-      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[113], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20172,7 +20172,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 925
-      helas_FFV1_0( w_fp[71], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20185,7 +20185,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 926
-      helas_VVV1_0( w_fp[0], w_fp[69], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[69], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20204,7 +20204,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 927
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20216,7 +20216,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20228,7 +20228,7 @@ namespace mg5amcCpu
       jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20247,7 +20247,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 928
-      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[47], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20262,7 +20262,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 929
-      helas_FFV1_0( w_fp[46], w_fp[113], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[113], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20275,7 +20275,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 930
-      helas_FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20288,7 +20288,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 931
-      helas_VVV1_0( w_fp[65], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[65], w_fp[54], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20307,7 +20307,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 932
-      helas_FFV1_0( w_fp[53], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20322,7 +20322,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 933
-      helas_FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20335,7 +20335,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 934
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20350,7 +20350,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 935
-      helas_FFV1_0( w_fp[53], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20363,7 +20363,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 936
-      helas_VVV1_0( w_fp[0], w_fp[70], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[70], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20382,7 +20382,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 937
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20394,7 +20394,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[103], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20406,7 +20406,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[63], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20425,7 +20425,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 938
-      helas_VVV1_0( w_fp[65], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[65], w_fp[20], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20444,7 +20444,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 939
-      helas_FFV1_0( w_fp[28], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20459,7 +20459,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 940
-      helas_FFV1_0( w_fp[62], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[94], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20472,7 +20472,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 941
-      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20487,7 +20487,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 942
-      helas_FFV1_0( w_fp[28], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20500,7 +20500,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 943
-      helas_VVV1_0( w_fp[0], w_fp[69], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[69], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20519,7 +20519,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 944
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20531,7 +20531,7 @@ namespace mg5amcCpu
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20543,7 +20543,7 @@ namespace mg5amcCpu
       jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20562,7 +20562,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 945
-      helas_FFV1_0( w_fp[3], w_fp[15], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[15], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20581,7 +20581,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 946
-      helas_FFV1_0( w_fp[14], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20600,7 +20600,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 947
-      helas_FFV1_0( w_fp[3], w_fp[94], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[94], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20619,7 +20619,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 948
-      helas_FFV1_0( w_fp[71], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20638,7 +20638,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 949
-      helas_FFV1_0( w_fp[14], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[94], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20653,7 +20653,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 950
-      helas_FFV1_0( w_fp[71], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20665,10 +20665,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 951 OF 1240 ***
 
       // Wavefunction(s) for diagram number 951
-      helas_VVV1P0_1( w_fp[0], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[71] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[71] );
 
       // Amplitude(s) for diagram number 951
-      helas_VVV1_0( w_fp[71], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[71], w_fp[13], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20695,7 +20695,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 952
-      helas_VVV1_0( w_fp[71], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[71], w_fp[10], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20722,7 +20722,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 953
-      helas_VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20742,7 +20742,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20762,7 +20762,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20789,7 +20789,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 954
-      helas_VVV1_0( w_fp[56], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[74], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20816,7 +20816,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 955
-      helas_VVV1_0( w_fp[56], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[75], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20843,7 +20843,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 956
-      helas_VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20863,7 +20863,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[105] += amp_sv[0];
-      helas_VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20883,7 +20883,7 @@ namespace mg5amcCpu
       jamp_sv[83] += amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[107] += amp_sv[0];
-      helas_VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20910,7 +20910,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 957
-      helas_VVV1_0( w_fp[0], w_fp[74], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[74], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20937,7 +20937,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 958
-      helas_VVV1_0( w_fp[0], w_fp[75], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[75], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20961,12 +20961,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 959 OF 1240 ***
 
       // Wavefunction(s) for diagram number 959
-      helas_VVVV1P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[94] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[65] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[94] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[65] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 959
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[94], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[94], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -20986,7 +20986,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[65], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[65], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21006,7 +21006,7 @@ namespace mg5amcCpu
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21030,12 +21030,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 960 OF 1240 ***
 
       // Wavefunction(s) for diagram number 960
-      helas_VVVV1P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[90] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[93] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[69] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[90] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[93] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[69] );
 
       // Amplitude(s) for diagram number 960
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21055,7 +21055,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[93], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[93], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21075,7 +21075,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[69], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[69], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21102,7 +21102,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 961
-      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21122,7 +21122,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[5], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21142,7 +21142,7 @@ namespace mg5amcCpu
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[107] += amp_sv[0];
-      helas_VVV1_0( w_fp[72], w_fp[5], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[5], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21169,7 +21169,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 962
-      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[4], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21189,7 +21189,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
-      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[4], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21209,7 +21209,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[105] += amp_sv[0];
-      helas_VVV1_0( w_fp[72], w_fp[4], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[4], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21236,7 +21236,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 963
-      helas_VVVV1_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21256,7 +21256,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21276,7 +21276,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21303,7 +21303,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 964
-      helas_VVV1_0( w_fp[8], w_fp[24], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[24], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21330,7 +21330,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 965
-      helas_VVV1_0( w_fp[72], w_fp[24], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[24], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21357,7 +21357,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 966
-      helas_VVV1_0( w_fp[72], w_fp[8], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[72], w_fp[8], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21384,7 +21384,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 967
-      helas_VVV1_0( w_fp[71], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[71], w_fp[37], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21403,7 +21403,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 968
-      helas_FFV1_0( w_fp[3], w_fp[35], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[35], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21418,7 +21418,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 969
-      helas_FFV1_0( w_fp[76], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21431,7 +21431,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 970
-      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[114], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21446,7 +21446,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 971
-      helas_FFV1_0( w_fp[76], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21459,7 +21459,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 972
-      helas_VVV1_0( w_fp[0], w_fp[75], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[75], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21478,7 +21478,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 973
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21490,7 +21490,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21502,7 +21502,7 @@ namespace mg5amcCpu
       jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21521,7 +21521,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 974
-      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[33], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21536,7 +21536,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 975
-      helas_FFV1_0( w_fp[38], w_fp[114], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[114], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21549,7 +21549,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 976
-      helas_FFV1_0( w_fp[104], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[33], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21562,7 +21562,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 977
-      helas_VVV1_0( w_fp[71], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[71], w_fp[45], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21581,7 +21581,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 978
-      helas_FFV1_0( w_fp[3], w_fp[43], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[43], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21596,7 +21596,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 979
-      helas_FFV1_0( w_fp[76], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21609,7 +21609,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 980
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21624,7 +21624,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 981
-      helas_FFV1_0( w_fp[76], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21637,7 +21637,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 982
-      helas_VVV1_0( w_fp[0], w_fp[74], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[74], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21656,7 +21656,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 983
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[94], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[94], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21668,7 +21668,7 @@ namespace mg5amcCpu
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21680,7 +21680,7 @@ namespace mg5amcCpu
       jamp_sv[85] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21699,7 +21699,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 984
-      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21714,7 +21714,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 985
-      helas_FFV1_0( w_fp[46], w_fp[102], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[102], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21727,7 +21727,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 986
-      helas_FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21740,7 +21740,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 987
-      helas_VVV1_0( w_fp[71], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[71], w_fp[54], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21759,7 +21759,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 988
-      helas_FFV1_0( w_fp[7], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21774,7 +21774,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 989
-      helas_FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21787,7 +21787,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 990
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21802,7 +21802,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 991
-      helas_FFV1_0( w_fp[7], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21815,7 +21815,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 992
-      helas_VVV1_0( w_fp[0], w_fp[75], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[75], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21834,7 +21834,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 993
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21846,7 +21846,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[93], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21858,7 +21858,7 @@ namespace mg5amcCpu
       jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[69], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21877,7 +21877,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 994
-      helas_VVV1_0( w_fp[71], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[71], w_fp[23], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21896,7 +21896,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 995
-      helas_FFV1_0( w_fp[25], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21911,7 +21911,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 996
-      helas_FFV1_0( w_fp[104], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[97], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21924,7 +21924,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 997
-      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[2], w_fp[74], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21939,7 +21939,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 998
-      helas_FFV1_0( w_fp[25], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21952,7 +21952,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 999
-      helas_VVV1_0( w_fp[0], w_fp[74], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[74], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21971,7 +21971,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1000
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[94], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[94], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21983,7 +21983,7 @@ namespace mg5amcCpu
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[65], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -21995,7 +21995,7 @@ namespace mg5amcCpu
       jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22014,7 +22014,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1001
-      helas_FFV1_0( w_fp[3], w_fp[17], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[17], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22033,7 +22033,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1002
-      helas_FFV1_0( w_fp[26], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22052,7 +22052,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1003
-      helas_FFV1_0( w_fp[3], w_fp[97], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[97], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22071,7 +22071,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1004
-      helas_FFV1_0( w_fp[76], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22090,7 +22090,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1005
-      helas_FFV1_0( w_fp[26], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[97], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22105,7 +22105,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1006
-      helas_FFV1_0( w_fp[76], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[76], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22120,7 +22120,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1007
-      helas_VVV1_0( w_fp[56], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[59], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22147,7 +22147,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1008
-      helas_VVV1_0( w_fp[56], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[1], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22174,7 +22174,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1009
-      helas_VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22194,7 +22194,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22214,7 +22214,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22241,7 +22241,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1010
-      helas_VVV1_0( w_fp[98], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[98], w_fp[108], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22268,7 +22268,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1011
-      helas_VVV1_0( w_fp[98], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[98], w_fp[1], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22295,7 +22295,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1012
-      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22315,7 +22315,7 @@ namespace mg5amcCpu
       jamp_sv[101] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22335,7 +22335,7 @@ namespace mg5amcCpu
       jamp_sv[103] += amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22362,7 +22362,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1013
-      helas_VVV1_0( w_fp[0], w_fp[108], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[108], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22389,7 +22389,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1014
-      helas_VVV1_0( w_fp[0], w_fp[59], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[59], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22413,12 +22413,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1015 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1015
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[11] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[42] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[76] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[11] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[42] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[76] );
 
       // Amplitude(s) for diagram number 1015
-      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[24], w_fp[6], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22438,7 +22438,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[24], w_fp[6], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22458,7 +22458,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVV1_0( w_fp[24], w_fp[6], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[24], w_fp[6], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22482,12 +22482,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1016 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1016
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[97] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[71] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[97] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[71] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 1016
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[97], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[97], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22507,7 +22507,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22527,7 +22527,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22554,7 +22554,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1017
-      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[24], w_fp[118], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22574,7 +22574,7 @@ namespace mg5amcCpu
       jamp_sv[107] += amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[24], w_fp[119], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22594,7 +22594,7 @@ namespace mg5amcCpu
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[24], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[24], w_fp[120], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22621,7 +22621,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1018
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[85], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[85], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22641,7 +22641,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[112], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22661,7 +22661,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22688,7 +22688,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1019
-      helas_VVV1_0( w_fp[56], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[68], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22715,7 +22715,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1020
-      helas_VVV1_0( w_fp[56], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[1], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22742,7 +22742,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1021
-      helas_VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22762,7 +22762,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22782,7 +22782,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22809,7 +22809,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1022
-      helas_VVV1_0( w_fp[101], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[101], w_fp[108], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22836,7 +22836,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1023
-      helas_VVV1_0( w_fp[101], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[101], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22863,7 +22863,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1024
-      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22883,7 +22883,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22903,7 +22903,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22930,7 +22930,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1025
-      helas_VVV1_0( w_fp[0], w_fp[108], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[108], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22957,7 +22957,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1026
-      helas_VVV1_0( w_fp[0], w_fp[68], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[68], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -22984,7 +22984,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1027
-      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[27], w_fp[5], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23004,7 +23004,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[27], w_fp[5], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23024,7 +23024,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_VVV1_0( w_fp[27], w_fp[5], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[27], w_fp[5], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23048,12 +23048,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1028 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1028
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[10] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[16] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[10] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[16] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 1028
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23073,7 +23073,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23093,7 +23093,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23120,7 +23120,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1029
-      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[27], w_fp[115], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23140,7 +23140,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[27], w_fp[116], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23160,7 +23160,7 @@ namespace mg5amcCpu
       jamp_sv[77] += amp_sv[0];
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[27], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[27], w_fp[117], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23187,7 +23187,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1030
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23207,7 +23207,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23227,7 +23227,7 @@ namespace mg5amcCpu
       jamp_sv[77] += amp_sv[0];
       jamp_sv[109] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23254,7 +23254,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1031
-      helas_VVV1_0( w_fp[56], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[67], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23281,7 +23281,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1032
-      helas_VVV1_0( w_fp[56], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[56], w_fp[1], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23308,7 +23308,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1033
-      helas_VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23328,7 +23328,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23348,7 +23348,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23375,7 +23375,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1034
-      helas_VVV1_0( w_fp[96], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[96], w_fp[108], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23402,7 +23402,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1035
-      helas_VVV1_0( w_fp[96], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[96], w_fp[1], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23429,7 +23429,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1036
-      helas_VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23449,7 +23449,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23469,7 +23469,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23496,7 +23496,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1037
-      helas_VVV1_0( w_fp[0], w_fp[108], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[108], w_fp[19], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23523,7 +23523,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1038
-      helas_VVV1_0( w_fp[0], w_fp[67], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[67], w_fp[13], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23550,7 +23550,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1039
-      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[29], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23570,7 +23570,7 @@ namespace mg5amcCpu
       jamp_sv[70] += amp_sv[0];
       jamp_sv[94] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[29], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23590,7 +23590,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVV1_0( w_fp[4], w_fp[29], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[29], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23614,12 +23614,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1040 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1040
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[76] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[42] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[11] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[76] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[42] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 1040
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[76], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23639,7 +23639,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[42], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23659,7 +23659,7 @@ namespace mg5amcCpu
       jamp_sv[70] += amp_sv[0];
       jamp_sv[90] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23686,7 +23686,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1041
-      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23706,7 +23706,7 @@ namespace mg5amcCpu
       jamp_sv[71] -= amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[29], w_fp[95], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23726,7 +23726,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[29], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[29], w_fp[105], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23753,7 +23753,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1042
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[87], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[87], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23773,7 +23773,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[34], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[34], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23793,7 +23793,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[86], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23820,7 +23820,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1043
-      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23840,7 +23840,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23860,7 +23860,7 @@ namespace mg5amcCpu
       jamp_sv[71] -= amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23880,7 +23880,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23900,7 +23900,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23920,7 +23920,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23940,7 +23940,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23960,7 +23960,7 @@ namespace mg5amcCpu
       jamp_sv[113] += amp_sv[0];
       jamp_sv[118] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -23980,7 +23980,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24007,7 +24007,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1044
-      helas_VVV1_0( w_fp[1], w_fp[30], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[30], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24027,7 +24027,7 @@ namespace mg5amcCpu
       jamp_sv[71] -= amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[31], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[31], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24047,7 +24047,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[32], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[32], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24074,7 +24074,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1045
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[92], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24094,7 +24094,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24114,7 +24114,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[106], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[106], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24141,7 +24141,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1046
-      helas_FFV1_0( w_fp[58], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[114], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24153,7 +24153,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1047
-      helas_FFV1_0( w_fp[48], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24165,7 +24165,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1048
-      helas_FFV1_0( w_fp[104], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[100], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24177,7 +24177,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1049
-      helas_FFV1_0( w_fp[104], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[36], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24189,7 +24189,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1050
-      helas_FFV1_0( w_fp[48], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[48], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24201,7 +24201,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1051
-      helas_FFV1_0( w_fp[58], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[36], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24213,7 +24213,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1052
-      helas_FFV1_0( w_fp[60], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[114], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24225,7 +24225,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1053
-      helas_FFV1_0( w_fp[40], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24237,7 +24237,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1054
-      helas_FFV1_0( w_fp[62], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[100], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24249,7 +24249,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1055
-      helas_FFV1_0( w_fp[62], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[35], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24261,7 +24261,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1056
-      helas_FFV1_0( w_fp[40], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[40], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24273,7 +24273,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1057
-      helas_FFV1_0( w_fp[60], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[35], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24285,7 +24285,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1058
-      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[114], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24300,7 +24300,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1059
-      helas_FFV1_0( w_fp[12], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[114], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24313,7 +24313,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1060
-      helas_FFV1_0( w_fp[3], w_fp[100], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[100], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24328,7 +24328,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1061
-      helas_VVV1_0( w_fp[96], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[96], w_fp[1], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24347,7 +24347,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1062
-      helas_FFV1_0( w_fp[12], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[100], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24360,7 +24360,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1063
-      helas_VVV1_0( w_fp[0], w_fp[67], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[67], w_fp[37], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24379,7 +24379,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1064
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[76], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[76], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24391,7 +24391,7 @@ namespace mg5amcCpu
       jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24403,7 +24403,7 @@ namespace mg5amcCpu
       jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[64] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24422,7 +24422,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1065
-      helas_FFV1_0( w_fp[78], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[102], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24434,7 +24434,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1066
-      helas_FFV1_0( w_fp[53], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24446,7 +24446,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1067
-      helas_FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24458,7 +24458,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1068
-      helas_FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24470,7 +24470,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1069
-      helas_FFV1_0( w_fp[53], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[53], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24482,7 +24482,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1070
-      helas_FFV1_0( w_fp[78], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[44], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24494,7 +24494,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1071
-      helas_FFV1_0( w_fp[60], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[102], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24506,7 +24506,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1072
-      helas_FFV1_0( w_fp[28], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24518,7 +24518,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1073
-      helas_FFV1_0( w_fp[62], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[89], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24530,7 +24530,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1074
-      helas_FFV1_0( w_fp[62], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[43], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24542,7 +24542,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1075
-      helas_FFV1_0( w_fp[28], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[28], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24554,7 +24554,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1076
-      helas_FFV1_0( w_fp[60], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[43], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24566,7 +24566,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1077
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24581,7 +24581,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1078
-      helas_FFV1_0( w_fp[14], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[102], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24594,7 +24594,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1079
-      helas_FFV1_0( w_fp[3], w_fp[89], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[89], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24609,7 +24609,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1080
-      helas_VVV1_0( w_fp[101], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[101], w_fp[1], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24628,7 +24628,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1081
-      helas_FFV1_0( w_fp[14], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[89], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24641,7 +24641,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1082
-      helas_VVV1_0( w_fp[0], w_fp[68], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[68], w_fp[45], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24660,7 +24660,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1083
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24672,7 +24672,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24684,7 +24684,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24703,7 +24703,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1084
-      helas_FFV1_0( w_fp[78], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[113], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24715,7 +24715,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1085
-      helas_FFV1_0( w_fp[7], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24727,7 +24727,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1086
-      helas_FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24739,7 +24739,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1087
-      helas_FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24751,7 +24751,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1088
-      helas_FFV1_0( w_fp[7], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[7], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24763,7 +24763,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1089
-      helas_FFV1_0( w_fp[78], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[50], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24775,7 +24775,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1090
-      helas_FFV1_0( w_fp[58], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[113], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24787,7 +24787,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1091
-      helas_FFV1_0( w_fp[25], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24799,7 +24799,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1092
-      helas_FFV1_0( w_fp[104], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[91], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24811,7 +24811,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1093
-      helas_FFV1_0( w_fp[104], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[49], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24823,7 +24823,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1094
-      helas_FFV1_0( w_fp[25], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[25], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24835,7 +24835,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1095
-      helas_FFV1_0( w_fp[58], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[49], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24847,7 +24847,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1096
-      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[113], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24862,7 +24862,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1097
-      helas_FFV1_0( w_fp[26], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[113], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24875,7 +24875,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1098
-      helas_FFV1_0( w_fp[3], w_fp[91], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[91], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24890,7 +24890,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1099
-      helas_VVV1_0( w_fp[98], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[98], w_fp[1], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24909,7 +24909,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1100
-      helas_FFV1_0( w_fp[26], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[26], w_fp[91], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24922,7 +24922,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1101
-      helas_VVV1_0( w_fp[0], w_fp[59], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[59], w_fp[51], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24941,7 +24941,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1102
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24953,7 +24953,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24965,7 +24965,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24984,7 +24984,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1103
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -24999,7 +24999,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1104
-      helas_FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25012,7 +25012,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1105
-      helas_FFV1_0( w_fp[78], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[2], w_fp[96], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25027,7 +25027,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1106
-      helas_VVV1_0( w_fp[96], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[96], w_fp[1], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25046,7 +25046,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1107
-      helas_FFV1_0( w_fp[78], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[78], w_fp[18], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25059,7 +25059,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1108
-      helas_VVV1_0( w_fp[0], w_fp[67], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[67], w_fp[54], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25078,7 +25078,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1109
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[76], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[76], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25090,7 +25090,7 @@ namespace mg5amcCpu
       jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[42], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25102,7 +25102,7 @@ namespace mg5amcCpu
       jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25121,7 +25121,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1110
-      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25136,7 +25136,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1111
-      helas_FFV1_0( w_fp[104], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[15], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25149,7 +25149,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1112
-      helas_FFV1_0( w_fp[58], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[2], w_fp[101], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25164,7 +25164,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1113
-      helas_VVV1_0( w_fp[101], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[101], w_fp[1], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25183,7 +25183,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1114
-      helas_FFV1_0( w_fp[58], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[58], w_fp[15], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25196,7 +25196,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1115
-      helas_VVV1_0( w_fp[0], w_fp[68], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[68], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25215,7 +25215,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1116
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25227,7 +25227,7 @@ namespace mg5amcCpu
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25239,7 +25239,7 @@ namespace mg5amcCpu
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25258,7 +25258,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1117
-      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25273,7 +25273,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1118
-      helas_FFV1_0( w_fp[62], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[17], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25286,7 +25286,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1119
-      helas_FFV1_0( w_fp[60], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25301,7 +25301,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1120
-      helas_VVV1_0( w_fp[98], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[98], w_fp[1], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25320,7 +25320,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1121
-      helas_FFV1_0( w_fp[60], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[17], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25333,7 +25333,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1122
-      helas_VVV1_0( w_fp[0], w_fp[59], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[59], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25352,7 +25352,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1123
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25364,7 +25364,7 @@ namespace mg5amcCpu
       jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25376,7 +25376,7 @@ namespace mg5amcCpu
       jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25392,12 +25392,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1124 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1124
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[71] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[97] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[71] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[97] );
 
       // Amplitude(s) for diagram number 1124
-      helas_VVVV1_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25417,7 +25417,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV3_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25437,7 +25437,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV4_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25457,7 +25457,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25477,7 +25477,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVVV3_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25497,7 +25497,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVVV4_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25517,7 +25517,7 @@ namespace mg5amcCpu
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25537,7 +25537,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[118] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25557,7 +25557,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[118] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25581,12 +25581,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1125 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1125
-      helas_VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[59] );
-      helas_VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] );
-      helas_VVV1P0_1( w_fp[97], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[60] );
+      helas_CD_VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[59] );
+      helas_CD_VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] );
+      helas_CD_VVV1P0_1( w_fp[97], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[60] );
 
       // Amplitude(s) for diagram number 1125
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[59], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[59], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25606,7 +25606,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25626,7 +25626,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25650,12 +25650,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1126 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1126
-      helas_VVV1P0_1( w_fp[21], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[17] );
-      helas_VVV1P0_1( w_fp[71], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[98] );
-      helas_VVV1P0_1( w_fp[97], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[111] );
+      helas_CD_VVV1P0_1( w_fp[21], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[17] );
+      helas_CD_VVV1P0_1( w_fp[71], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[98] );
+      helas_CD_VVV1P0_1( w_fp[97], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 1126
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25675,7 +25675,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25695,7 +25695,7 @@ namespace mg5amcCpu
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25722,7 +25722,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1127
-      helas_VVV1_0( w_fp[21], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[21], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25742,7 +25742,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVV1_0( w_fp[71], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[71], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25762,7 +25762,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVV1_0( w_fp[97], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[97], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25786,12 +25786,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1128 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1128
-      helas_FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      helas_FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
-      helas_FFV1_2( w_fp[3], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
 
       // Amplitude(s) for diagram number 1128
-      helas_FFV1_0( w_fp[16], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25799,7 +25799,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
-      helas_FFV1_0( w_fp[10], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25807,7 +25807,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[93] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
-      helas_FFV1_0( w_fp[68], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[68], w_fp[39], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25822,7 +25822,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1129
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25834,7 +25834,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25846,7 +25846,7 @@ namespace mg5amcCpu
       jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25865,7 +25865,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1130
-      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25873,7 +25873,7 @@ namespace mg5amcCpu
       jamp_sv[74] -= amp_sv[0];
       jamp_sv[80] -= amp_sv[0];
       jamp_sv[86] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25881,7 +25881,7 @@ namespace mg5amcCpu
       jamp_sv[78] += amp_sv[0];
       jamp_sv[80] -= amp_sv[0];
       jamp_sv[84] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[39], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[39], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25896,7 +25896,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1131
-      helas_FFV1_0( w_fp[16], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25904,7 +25904,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_FFV1_0( w_fp[10], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25912,7 +25912,7 @@ namespace mg5amcCpu
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_FFV1_0( w_fp[68], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[68], w_fp[47], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25927,7 +25927,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1132
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25939,7 +25939,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25951,7 +25951,7 @@ namespace mg5amcCpu
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25970,7 +25970,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1133
-      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25978,7 +25978,7 @@ namespace mg5amcCpu
       jamp_sv[98] -= amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25986,7 +25986,7 @@ namespace mg5amcCpu
       jamp_sv[102] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[108] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[47], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[47], w_fp[97], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -25998,12 +25998,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1134 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1134
-      helas_FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
-      helas_FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
-      helas_FFV1_1( w_fp[2], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
 
       // Amplitude(s) for diagram number 1134
-      helas_FFV1_0( w_fp[38], w_fp[23], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[23], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26011,7 +26011,7 @@ namespace mg5amcCpu
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
       jamp_sv[55] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[21], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[21], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26019,7 +26019,7 @@ namespace mg5amcCpu
       jamp_sv[25] += amp_sv[0];
       jamp_sv[31] -= amp_sv[0];
       jamp_sv[49] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[71], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[71], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26034,7 +26034,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1135
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26046,7 +26046,7 @@ namespace mg5amcCpu
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26058,7 +26058,7 @@ namespace mg5amcCpu
       jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26077,7 +26077,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1136
-      helas_FFV1_0( w_fp[41], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26085,7 +26085,7 @@ namespace mg5amcCpu
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
       jamp_sv[54] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[21], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[21], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26093,7 +26093,7 @@ namespace mg5amcCpu
       jamp_sv[24] += amp_sv[0];
       jamp_sv[30] -= amp_sv[0];
       jamp_sv[48] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[71], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[71], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26108,7 +26108,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1137
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26120,7 +26120,7 @@ namespace mg5amcCpu
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26132,7 +26132,7 @@ namespace mg5amcCpu
       jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26151,7 +26151,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1138
-      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[23], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26163,7 +26163,7 @@ namespace mg5amcCpu
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[55] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[21], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[21], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26175,7 +26175,7 @@ namespace mg5amcCpu
       jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[71], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[71], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26194,7 +26194,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1139
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26206,7 +26206,7 @@ namespace mg5amcCpu
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[10], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26218,7 +26218,7 @@ namespace mg5amcCpu
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[68], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[68], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26234,12 +26234,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1140 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1140
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[68] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[29] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[10] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[68] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[29] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 1140
-      helas_VVVV1_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26259,7 +26259,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_VVVV3_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26279,7 +26279,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26299,7 +26299,7 @@ namespace mg5amcCpu
       jamp_sv[100] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26319,7 +26319,7 @@ namespace mg5amcCpu
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      helas_VVVV3_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26339,7 +26339,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26359,7 +26359,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26379,7 +26379,7 @@ namespace mg5amcCpu
       jamp_sv[110] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26399,7 +26399,7 @@ namespace mg5amcCpu
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      helas_VVVV4_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26423,12 +26423,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1141 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1141
-      helas_VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[16] );
-      helas_VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[71] );
-      helas_VVV1P0_1( w_fp[10], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[16] );
+      helas_CD_VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[71] );
+      helas_CD_VVV1P0_1( w_fp[10], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 1141
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26448,7 +26448,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26468,7 +26468,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26492,12 +26492,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1142 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1142
-      helas_VVV1P0_1( w_fp[68], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] );
-      helas_VVV1P0_1( w_fp[29], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[60] );
-      helas_VVV1P0_1( w_fp[10], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[20] );
+      helas_CD_VVV1P0_1( w_fp[68], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_CD_VVV1P0_1( w_fp[29], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[60] );
+      helas_CD_VVV1P0_1( w_fp[10], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[20] );
 
       // Amplitude(s) for diagram number 1142
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26517,7 +26517,7 @@ namespace mg5amcCpu
       jamp_sv[100] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26537,7 +26537,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[20], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26564,7 +26564,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1143
-      helas_VVV1_0( w_fp[68], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[68], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26584,7 +26584,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_VVV1_0( w_fp[29], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[29], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26604,7 +26604,7 @@ namespace mg5amcCpu
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      helas_VVV1_0( w_fp[10], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[10], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26628,12 +26628,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1144 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1144
-      helas_FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
-      helas_FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[111] );
-      helas_FFV1_2( w_fp[3], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[111] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
 
       // Amplitude(s) for diagram number 1144
-      helas_FFV1_0( w_fp[59], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[59], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26641,7 +26641,7 @@ namespace mg5amcCpu
       jamp_sv[67] -= amp_sv[0];
       jamp_sv[69] -= amp_sv[0];
       jamp_sv[71] += amp_sv[0];
-      helas_FFV1_0( w_fp[111], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[111], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26649,7 +26649,7 @@ namespace mg5amcCpu
       jamp_sv[68] += amp_sv[0];
       jamp_sv[69] -= amp_sv[0];
       jamp_sv[70] += amp_sv[0];
-      helas_FFV1_0( w_fp[98], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[98], w_fp[33], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26664,7 +26664,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1145
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26676,7 +26676,7 @@ namespace mg5amcCpu
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26688,7 +26688,7 @@ namespace mg5amcCpu
       jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26707,7 +26707,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1146
-      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26715,7 +26715,7 @@ namespace mg5amcCpu
       jamp_sv[50] -= amp_sv[0];
       jamp_sv[56] -= amp_sv[0];
       jamp_sv[62] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26723,7 +26723,7 @@ namespace mg5amcCpu
       jamp_sv[54] += amp_sv[0];
       jamp_sv[56] -= amp_sv[0];
       jamp_sv[60] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[33], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[33], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26738,7 +26738,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1147
-      helas_FFV1_0( w_fp[59], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[59], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26746,7 +26746,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[113] += amp_sv[0];
-      helas_FFV1_0( w_fp[111], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[111], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26754,7 +26754,7 @@ namespace mg5amcCpu
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
-      helas_FFV1_0( w_fp[98], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[98], w_fp[47], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26769,7 +26769,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1148
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26781,7 +26781,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26793,7 +26793,7 @@ namespace mg5amcCpu
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26812,7 +26812,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1149
-      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[47], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26820,7 +26820,7 @@ namespace mg5amcCpu
       jamp_sv[100] -= amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[47], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26828,7 +26828,7 @@ namespace mg5amcCpu
       jamp_sv[103] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[114] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[47], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[47], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26840,12 +26840,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1150 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1150
-      helas_FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
-      helas_FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
-      helas_FFV1_1( w_fp[2], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] );
 
       // Amplitude(s) for diagram number 1150
-      helas_FFV1_0( w_fp[46], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[17], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26853,7 +26853,7 @@ namespace mg5amcCpu
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
       jamp_sv[79] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[68], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[68], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26861,7 +26861,7 @@ namespace mg5amcCpu
       jamp_sv[27] += amp_sv[0];
       jamp_sv[37] -= amp_sv[0];
       jamp_sv[73] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[29], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[29], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26876,7 +26876,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1151
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26888,7 +26888,7 @@ namespace mg5amcCpu
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26900,7 +26900,7 @@ namespace mg5amcCpu
       jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26919,7 +26919,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1152
-      helas_FFV1_0( w_fp[41], w_fp[17], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[17], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26927,7 +26927,7 @@ namespace mg5amcCpu
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
       jamp_sv[78] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[68], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[68], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26935,7 +26935,7 @@ namespace mg5amcCpu
       jamp_sv[26] += amp_sv[0];
       jamp_sv[36] -= amp_sv[0];
       jamp_sv[72] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[29], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[29], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26950,7 +26950,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1153
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26962,7 +26962,7 @@ namespace mg5amcCpu
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26974,7 +26974,7 @@ namespace mg5amcCpu
       jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -26993,7 +26993,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1154
-      helas_FFV1_0( w_fp[3], w_fp[17], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[17], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27005,7 +27005,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[79] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[68], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[68], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27017,7 +27017,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[29], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[29], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27036,7 +27036,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1155
-      helas_FFV1_0( w_fp[59], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[59], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27048,7 +27048,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[111], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[111], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27060,7 +27060,7 @@ namespace mg5amcCpu
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[98], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[98], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27076,12 +27076,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1156 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1156
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[98] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[27] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[98] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[27] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] );
 
       // Amplitude(s) for diagram number 1156
-      helas_VVVV1_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27101,7 +27101,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27121,7 +27121,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[102] += amp_sv[0];
-      helas_VVVV4_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27141,7 +27141,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[103] += amp_sv[0];
-      helas_VVVV1_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27161,7 +27161,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[97] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27181,7 +27181,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[96] += amp_sv[0];
-      helas_VVVV4_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27201,7 +27201,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[97] += amp_sv[0];
-      helas_VVVV1_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27221,7 +27221,7 @@ namespace mg5amcCpu
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
       jamp_sv[103] += amp_sv[0];
-      helas_VVVV3_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27241,7 +27241,7 @@ namespace mg5amcCpu
       jamp_sv[92] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[102] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27265,12 +27265,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1157 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1157
-      helas_VVV1P0_1( w_fp[98], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[59] );
-      helas_VVV1P0_1( w_fp[27], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[29] );
-      helas_VVV1P0_1( w_fp[111], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[68] );
+      helas_CD_VVV1P0_1( w_fp[98], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[59] );
+      helas_CD_VVV1P0_1( w_fp[27], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[29] );
+      helas_CD_VVV1P0_1( w_fp[111], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[68] );
 
       // Amplitude(s) for diagram number 1157
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[59], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[59], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27290,7 +27290,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[102] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27310,7 +27310,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[96] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27334,12 +27334,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1158 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1158
-      helas_VVV1P0_1( w_fp[98], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[17] );
-      helas_VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[21] );
-      helas_VVV1P0_1( w_fp[111], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[71] );
+      helas_CD_VVV1P0_1( w_fp[98], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[17] );
+      helas_CD_VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVV1P0_1( w_fp[111], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[71] );
 
       // Amplitude(s) for diagram number 1158
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27359,7 +27359,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[103] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27379,7 +27379,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[97] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27406,7 +27406,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1159
-      helas_VVV1_0( w_fp[98], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[98], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27426,7 +27426,7 @@ namespace mg5amcCpu
       jamp_sv[89] += amp_sv[0];
       jamp_sv[102] += amp_sv[0];
       jamp_sv[103] -= amp_sv[0];
-      helas_VVV1_0( w_fp[27], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[27], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27446,7 +27446,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[96] += amp_sv[0];
       jamp_sv[97] -= amp_sv[0];
-      helas_VVV1_0( w_fp[111], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[111], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27470,12 +27470,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1160 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1160
-      helas_FFV1_2( w_fp[3], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      helas_FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
-      helas_FFV1_2( w_fp[3], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
 
       // Amplitude(s) for diagram number 1160
-      helas_FFV1_0( w_fp[16], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27483,7 +27483,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= amp_sv[0];
       jamp_sv[63] -= amp_sv[0];
       jamp_sv[65] += amp_sv[0];
-      helas_FFV1_0( w_fp[20], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[20], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27491,7 +27491,7 @@ namespace mg5amcCpu
       jamp_sv[62] += amp_sv[0];
       jamp_sv[63] -= amp_sv[0];
       jamp_sv[64] += amp_sv[0];
-      helas_FFV1_0( w_fp[60], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[33], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27506,7 +27506,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1161
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27518,7 +27518,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27530,7 +27530,7 @@ namespace mg5amcCpu
       jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27549,7 +27549,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1162
-      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[33], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27557,7 +27557,7 @@ namespace mg5amcCpu
       jamp_sv[52] -= amp_sv[0];
       jamp_sv[58] -= amp_sv[0];
       jamp_sv[68] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[33], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27565,7 +27565,7 @@ namespace mg5amcCpu
       jamp_sv[55] += amp_sv[0];
       jamp_sv[58] -= amp_sv[0];
       jamp_sv[66] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[33], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[33], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27580,7 +27580,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1163
-      helas_FFV1_0( w_fp[16], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27588,7 +27588,7 @@ namespace mg5amcCpu
       jamp_sv[85] -= amp_sv[0];
       jamp_sv[87] -= amp_sv[0];
       jamp_sv[89] += amp_sv[0];
-      helas_FFV1_0( w_fp[20], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[20], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27596,7 +27596,7 @@ namespace mg5amcCpu
       jamp_sv[86] += amp_sv[0];
       jamp_sv[87] -= amp_sv[0];
       jamp_sv[88] += amp_sv[0];
-      helas_FFV1_0( w_fp[60], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[39], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27611,7 +27611,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1164
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27623,7 +27623,7 @@ namespace mg5amcCpu
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27635,7 +27635,7 @@ namespace mg5amcCpu
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27654,7 +27654,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1165
-      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[39], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27662,7 +27662,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= amp_sv[0];
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[39], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27670,7 +27670,7 @@ namespace mg5amcCpu
       jamp_sv[79] += amp_sv[0];
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[90] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[39], w_fp[111], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27682,12 +27682,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1166 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1166
-      helas_FFV1_1( w_fp[2], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
-      helas_FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
-      helas_FFV1_1( w_fp[2], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
 
       // Amplitude(s) for diagram number 1166
-      helas_FFV1_0( w_fp[46], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27695,7 +27695,7 @@ namespace mg5amcCpu
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
       jamp_sv[103] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[98], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[98], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27703,7 +27703,7 @@ namespace mg5amcCpu
       jamp_sv[29] += amp_sv[0];
       jamp_sv[43] -= amp_sv[0];
       jamp_sv[97] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[27], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[27], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27718,7 +27718,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1167
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27730,7 +27730,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27742,7 +27742,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27761,7 +27761,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1168
-      helas_FFV1_0( w_fp[38], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27769,7 +27769,7 @@ namespace mg5amcCpu
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
       jamp_sv[102] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[98], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[98], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27777,7 +27777,7 @@ namespace mg5amcCpu
       jamp_sv[28] += amp_sv[0];
       jamp_sv[42] -= amp_sv[0];
       jamp_sv[96] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[27], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[27], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27792,7 +27792,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1169
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27804,7 +27804,7 @@ namespace mg5amcCpu
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27816,7 +27816,7 @@ namespace mg5amcCpu
       jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27835,7 +27835,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1170
-      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[23], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27847,7 +27847,7 @@ namespace mg5amcCpu
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[98], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[98], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27859,7 +27859,7 @@ namespace mg5amcCpu
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[27], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[27], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27878,7 +27878,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1171
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27890,7 +27890,7 @@ namespace mg5amcCpu
       jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[20], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[20], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27902,7 +27902,7 @@ namespace mg5amcCpu
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[60], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27918,15 +27918,15 @@ namespace mg5amcCpu
       // *** DIAGRAM 1172 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1172
-      helas_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[60] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[20] );
-      helas_FFV1_2( w_fp[3], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      helas_FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
-      helas_FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[60] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[20] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] );
 
       // Amplitude(s) for diagram number 1172
-      helas_FFV1_0( w_fp[16], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27934,7 +27934,7 @@ namespace mg5amcCpu
       jamp_sv[43] -= amp_sv[0];
       jamp_sv[45] -= amp_sv[0];
       jamp_sv[47] += amp_sv[0];
-      helas_FFV1_0( w_fp[27], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[27], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27942,7 +27942,7 @@ namespace mg5amcCpu
       jamp_sv[44] += amp_sv[0];
       jamp_sv[45] -= amp_sv[0];
       jamp_sv[46] += amp_sv[0];
-      helas_FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27954,12 +27954,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1173 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1173
-      helas_VVV1P0_1( w_fp[60], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] );
-      helas_VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[68] );
-      helas_VVV1P0_1( w_fp[20], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] );
+      helas_CD_VVV1P0_1( w_fp[60], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_CD_VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[68] );
+      helas_CD_VVV1P0_1( w_fp[20], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] );
 
       // Amplitude(s) for diagram number 1173
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27971,7 +27971,7 @@ namespace mg5amcCpu
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -27983,7 +27983,7 @@ namespace mg5amcCpu
       jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28002,7 +28002,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1174
-      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[77], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28010,7 +28010,7 @@ namespace mg5amcCpu
       jamp_sv[26] -= amp_sv[0];
       jamp_sv[32] -= amp_sv[0];
       jamp_sv[38] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28018,7 +28018,7 @@ namespace mg5amcCpu
       jamp_sv[30] += amp_sv[0];
       jamp_sv[32] -= amp_sv[0];
       jamp_sv[36] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[77], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[77], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28030,12 +28030,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1175 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1175
-      helas_FFV1_1( w_fp[2], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
-      helas_FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
-      helas_FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 1175
-      helas_FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28043,7 +28043,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[61] -= amp_sv[0];
       jamp_sv[85] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[71], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[71], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28051,7 +28051,7 @@ namespace mg5amcCpu
       jamp_sv[51] += amp_sv[0];
       jamp_sv[61] -= amp_sv[0];
       jamp_sv[75] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[21], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[21], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28066,7 +28066,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1176
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28078,7 +28078,7 @@ namespace mg5amcCpu
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28090,7 +28090,7 @@ namespace mg5amcCpu
       jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28109,7 +28109,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1177
-      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28117,7 +28117,7 @@ namespace mg5amcCpu
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28125,7 +28125,7 @@ namespace mg5amcCpu
       jamp_sv[109] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[47], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[47], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28140,7 +28140,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1178
-      helas_FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28152,7 +28152,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[85] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[71], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[71], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28164,7 +28164,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[21], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[21], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28183,7 +28183,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1179
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28195,7 +28195,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[27], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[27], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28207,7 +28207,7 @@ namespace mg5amcCpu
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28226,7 +28226,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1180
-      helas_VVV1_0( w_fp[60], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[60], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28246,7 +28246,7 @@ namespace mg5amcCpu
       jamp_sv[103] += amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      helas_VVV1_0( w_fp[24], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[24], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28266,7 +28266,7 @@ namespace mg5amcCpu
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[105] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      helas_VVV1_0( w_fp[20], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[20], w_fp[72], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28293,7 +28293,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1181
-      helas_VVVV1_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28313,7 +28313,7 @@ namespace mg5amcCpu
       jamp_sv[107] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28333,7 +28333,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[107] += amp_sv[0];
-      helas_VVVV4_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28353,7 +28353,7 @@ namespace mg5amcCpu
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVVV1_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28373,7 +28373,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28393,7 +28393,7 @@ namespace mg5amcCpu
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      helas_VVVV4_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28413,7 +28413,7 @@ namespace mg5amcCpu
       jamp_sv[109] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVVV1_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28433,7 +28433,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVVV3_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28453,7 +28453,7 @@ namespace mg5amcCpu
       jamp_sv[104] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28477,12 +28477,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1182 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1182
-      helas_VVV1P0_1( w_fp[60], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[72] );
-      helas_VVV1P0_1( w_fp[24], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[60] );
-      helas_VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[24] );
+      helas_CD_VVV1P0_1( w_fp[60], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[72] );
+      helas_CD_VVV1P0_1( w_fp[24], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[60] );
+      helas_CD_VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[24] );
 
       // Amplitude(s) for diagram number 1182
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[72], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[72], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28502,7 +28502,7 @@ namespace mg5amcCpu
       jamp_sv[107] += amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[118] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28522,7 +28522,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= amp_sv[0];
       jamp_sv[112] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28549,7 +28549,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1183
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28569,7 +28569,7 @@ namespace mg5amcCpu
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[118] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28589,7 +28589,7 @@ namespace mg5amcCpu
       jamp_sv[109] += amp_sv[0];
       jamp_sv[112] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[29], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28616,7 +28616,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1184
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28628,7 +28628,7 @@ namespace mg5amcCpu
       jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28640,7 +28640,7 @@ namespace mg5amcCpu
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28659,7 +28659,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1185
-      helas_FFV1_0( w_fp[16], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28667,7 +28667,7 @@ namespace mg5amcCpu
       jamp_sv[103] -= amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[107] += amp_sv[0];
-      helas_FFV1_0( w_fp[27], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[27], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28675,7 +28675,7 @@ namespace mg5amcCpu
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      helas_FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28690,7 +28690,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1186
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28702,7 +28702,7 @@ namespace mg5amcCpu
       jamp_sv[38] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28714,7 +28714,7 @@ namespace mg5amcCpu
       jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28733,7 +28733,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1187
-      helas_FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28741,7 +28741,7 @@ namespace mg5amcCpu
       jamp_sv[14] -= amp_sv[0];
       jamp_sv[60] -= amp_sv[0];
       jamp_sv[84] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[71], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[71], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28749,7 +28749,7 @@ namespace mg5amcCpu
       jamp_sv[50] += amp_sv[0];
       jamp_sv[60] -= amp_sv[0];
       jamp_sv[74] += amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[21], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[21], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28761,15 +28761,15 @@ namespace mg5amcCpu
       // *** DIAGRAM 1188 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1188
-      helas_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[71] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[59] );
-      helas_FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] );
-      helas_FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
-      helas_FFV1_2( w_fp[3], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[71] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[59] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] );
 
       // Amplitude(s) for diagram number 1188
-      helas_FFV1_0( w_fp[24], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[24], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28777,7 +28777,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= amp_sv[0];
       jamp_sv[39] -= amp_sv[0];
       jamp_sv[41] += amp_sv[0];
-      helas_FFV1_0( w_fp[60], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28785,7 +28785,7 @@ namespace mg5amcCpu
       jamp_sv[38] += amp_sv[0];
       jamp_sv[39] -= amp_sv[0];
       jamp_sv[40] += amp_sv[0];
-      helas_FFV1_0( w_fp[72], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[72], w_fp[77], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28797,12 +28797,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1189 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1189
-      helas_VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[98] );
-      helas_VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[27] );
-      helas_VVV1P0_1( w_fp[59], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] );
+      helas_CD_VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[98] );
+      helas_CD_VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[27] );
+      helas_CD_VVV1P0_1( w_fp[59], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] );
 
       // Amplitude(s) for diagram number 1189
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28814,7 +28814,7 @@ namespace mg5amcCpu
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28826,7 +28826,7 @@ namespace mg5amcCpu
       jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28845,7 +28845,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1190
-      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[77], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28853,7 +28853,7 @@ namespace mg5amcCpu
       jamp_sv[28] -= amp_sv[0];
       jamp_sv[34] -= amp_sv[0];
       jamp_sv[44] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[77], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28861,7 +28861,7 @@ namespace mg5amcCpu
       jamp_sv[31] += amp_sv[0];
       jamp_sv[34] -= amp_sv[0];
       jamp_sv[42] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[77], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[77], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28873,12 +28873,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1191 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1191
-      helas_FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] );
-      helas_FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
-      helas_FFV1_1( w_fp[2], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
 
       // Amplitude(s) for diagram number 1191
-      helas_FFV1_0( w_fp[52], w_fp[29], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[29], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28886,7 +28886,7 @@ namespace mg5amcCpu
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[67] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[68], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[68], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28894,7 +28894,7 @@ namespace mg5amcCpu
       jamp_sv[53] += amp_sv[0];
       jamp_sv[67] -= amp_sv[0];
       jamp_sv[99] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28909,7 +28909,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1192
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[98], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28921,7 +28921,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28933,7 +28933,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28952,7 +28952,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1193
-      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28960,7 +28960,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28968,7 +28968,7 @@ namespace mg5amcCpu
       jamp_sv[85] += amp_sv[0];
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[39], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[39], w_fp[59], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28983,7 +28983,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1194
-      helas_FFV1_0( w_fp[3], w_fp[29], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[29], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -28995,7 +28995,7 @@ namespace mg5amcCpu
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[68], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[68], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29007,7 +29007,7 @@ namespace mg5amcCpu
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[23], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29026,7 +29026,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1195
-      helas_FFV1_0( w_fp[24], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[24], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29038,7 +29038,7 @@ namespace mg5amcCpu
       jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[60], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29050,7 +29050,7 @@ namespace mg5amcCpu
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[72], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[72], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29069,7 +29069,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1196
-      helas_VVV1_0( w_fp[21], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[21], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29089,7 +29089,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      helas_VVV1_0( w_fp[71], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[71], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29109,7 +29109,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[98] -= amp_sv[0];
       jamp_sv[99] += amp_sv[0];
-      helas_VVV1_0( w_fp[59], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[59], w_fp[66], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29136,7 +29136,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1197
-      helas_VVVV1_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29156,7 +29156,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[108] += amp_sv[0];
-      helas_VVVV3_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29176,7 +29176,7 @@ namespace mg5amcCpu
       jamp_sv[83] += amp_sv[0];
       jamp_sv[108] += amp_sv[0];
       jamp_sv[109] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29196,7 +29196,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
       jamp_sv[109] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29216,7 +29216,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      helas_VVVV3_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29236,7 +29236,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29256,7 +29256,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29276,7 +29276,7 @@ namespace mg5amcCpu
       jamp_sv[94] += amp_sv[0];
       jamp_sv[98] += amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29296,7 +29296,7 @@ namespace mg5amcCpu
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[108] -= amp_sv[0];
       jamp_sv[109] += amp_sv[0];
-      helas_VVVV4_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29320,12 +29320,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1198 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1198
-      helas_VVV1P0_1( w_fp[21], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[66] );
-      helas_VVV1P0_1( w_fp[71], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] );
-      helas_VVV1P0_1( w_fp[59], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[71] );
+      helas_CD_VVV1P0_1( w_fp[21], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[66] );
+      helas_CD_VVV1P0_1( w_fp[71], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVV1P0_1( w_fp[59], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[71] );
 
       // Amplitude(s) for diagram number 1198
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[66], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[66], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29345,7 +29345,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[94] -= amp_sv[0];
       jamp_sv[108] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29365,7 +29365,7 @@ namespace mg5amcCpu
       jamp_sv[88] += amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[71], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29392,7 +29392,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1199
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[98], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29412,7 +29412,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[94] += amp_sv[0];
       jamp_sv[109] -= amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29432,7 +29432,7 @@ namespace mg5amcCpu
       jamp_sv[88] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29459,7 +29459,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1200
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29471,7 +29471,7 @@ namespace mg5amcCpu
       jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29483,7 +29483,7 @@ namespace mg5amcCpu
       jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29502,7 +29502,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1201
-      helas_FFV1_0( w_fp[24], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[24], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29510,7 +29510,7 @@ namespace mg5amcCpu
       jamp_sv[79] -= amp_sv[0];
       jamp_sv[81] -= amp_sv[0];
       jamp_sv[83] += amp_sv[0];
-      helas_FFV1_0( w_fp[60], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[60], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29518,7 +29518,7 @@ namespace mg5amcCpu
       jamp_sv[80] += amp_sv[0];
       jamp_sv[81] -= amp_sv[0];
       jamp_sv[82] += amp_sv[0];
-      helas_FFV1_0( w_fp[72], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[72], w_fp[39], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29533,7 +29533,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1202
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[66], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29545,7 +29545,7 @@ namespace mg5amcCpu
       jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29557,7 +29557,7 @@ namespace mg5amcCpu
       jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[71], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29576,7 +29576,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1203
-      helas_FFV1_0( w_fp[38], w_fp[29], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[29], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29584,7 +29584,7 @@ namespace mg5amcCpu
       jamp_sv[20] -= amp_sv[0];
       jamp_sv[66] -= amp_sv[0];
       jamp_sv[108] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[68], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[68], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29592,7 +29592,7 @@ namespace mg5amcCpu
       jamp_sv[52] += amp_sv[0];
       jamp_sv[66] -= amp_sv[0];
       jamp_sv[98] += amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[23], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[23], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29604,15 +29604,15 @@ namespace mg5amcCpu
       // *** DIAGRAM 1204 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1204
-      helas_VVVV1P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[68] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[29] );
-      helas_FFV1_2( w_fp[3], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
-      helas_FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
-      helas_FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[68] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[29] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] );
 
       // Amplitude(s) for diagram number 1204
-      helas_FFV1_0( w_fp[71], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29620,7 +29620,7 @@ namespace mg5amcCpu
       jamp_sv[31] -= amp_sv[0];
       jamp_sv[33] -= amp_sv[0];
       jamp_sv[35] += amp_sv[0];
-      helas_FFV1_0( w_fp[21], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29628,7 +29628,7 @@ namespace mg5amcCpu
       jamp_sv[32] += amp_sv[0];
       jamp_sv[33] -= amp_sv[0];
       jamp_sv[34] += amp_sv[0];
-      helas_FFV1_0( w_fp[66], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[66], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29640,12 +29640,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1205 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1205
-      helas_VVV1P0_1( w_fp[23], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[72] );
-      helas_VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[60] );
-      helas_VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[24] );
+      helas_CD_VVV1P0_1( w_fp[23], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[72] );
+      helas_CD_VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[60] );
+      helas_CD_VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[24] );
 
       // Amplitude(s) for diagram number 1205
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29657,7 +29657,7 @@ namespace mg5amcCpu
       jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29669,7 +29669,7 @@ namespace mg5amcCpu
       jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29688,7 +29688,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1206
-      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[77], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29696,7 +29696,7 @@ namespace mg5amcCpu
       jamp_sv[29] -= amp_sv[0];
       jamp_sv[40] -= amp_sv[0];
       jamp_sv[46] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[77], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29704,7 +29704,7 @@ namespace mg5amcCpu
       jamp_sv[37] += amp_sv[0];
       jamp_sv[40] -= amp_sv[0];
       jamp_sv[43] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29716,12 +29716,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1207 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1207
-      helas_FFV1_1( w_fp[2], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
-      helas_FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      helas_FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] );
 
       // Amplitude(s) for diagram number 1207
-      helas_FFV1_0( w_fp[52], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[77], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29729,7 +29729,7 @@ namespace mg5amcCpu
       jamp_sv[23] -= amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[16], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[16], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29737,7 +29737,7 @@ namespace mg5amcCpu
       jamp_sv[77] += amp_sv[0];
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[27], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[27], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29752,7 +29752,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1208
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[72], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29764,7 +29764,7 @@ namespace mg5amcCpu
       jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[60], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29776,7 +29776,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29795,7 +29795,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1209
-      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29803,7 +29803,7 @@ namespace mg5amcCpu
       jamp_sv[53] -= amp_sv[0];
       jamp_sv[64] -= amp_sv[0];
       jamp_sv[70] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29811,7 +29811,7 @@ namespace mg5amcCpu
       jamp_sv[61] += amp_sv[0];
       jamp_sv[64] -= amp_sv[0];
       jamp_sv[67] += amp_sv[0];
-      helas_FFV1_0( w_fp[52], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[52], w_fp[33], w_fp[29], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29826,7 +29826,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1210
-      helas_FFV1_0( w_fp[3], w_fp[77], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[77], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29838,7 +29838,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[16], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29850,7 +29850,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[27], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[27], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29869,7 +29869,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1211
-      helas_FFV1_0( w_fp[71], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29881,7 +29881,7 @@ namespace mg5amcCpu
       jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29893,7 +29893,7 @@ namespace mg5amcCpu
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[66], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[66], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29912,7 +29912,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1212
-      helas_VVV1_0( w_fp[23], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[23], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29932,7 +29932,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVV1_0( w_fp[68], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[68], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29952,7 +29952,7 @@ namespace mg5amcCpu
       jamp_sv[91] -= amp_sv[0];
       jamp_sv[100] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      helas_VVV1_0( w_fp[29], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[29], w_fp[61], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29979,7 +29979,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1213
-      helas_VVVV1_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -29999,7 +29999,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[114] += amp_sv[0];
-      helas_VVVV3_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30019,7 +30019,7 @@ namespace mg5amcCpu
       jamp_sv[91] += amp_sv[0];
       jamp_sv[114] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30039,7 +30039,7 @@ namespace mg5amcCpu
       jamp_sv[70] += amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30059,7 +30059,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      helas_VVVV3_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30079,7 +30079,7 @@ namespace mg5amcCpu
       jamp_sv[91] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30099,7 +30099,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30119,7 +30119,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[100] += amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30139,7 +30139,7 @@ namespace mg5amcCpu
       jamp_sv[101] -= amp_sv[0];
       jamp_sv[114] -= amp_sv[0];
       jamp_sv[115] += amp_sv[0];
-      helas_VVVV4_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30163,12 +30163,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1214 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1214
-      helas_VVV1P0_1( w_fp[23], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[61] );
-      helas_VVV1P0_1( w_fp[68], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[23] );
-      helas_VVV1P0_1( w_fp[29], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[68] );
+      helas_CD_VVV1P0_1( w_fp[23], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[61] );
+      helas_CD_VVV1P0_1( w_fp[68], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_CD_VVV1P0_1( w_fp[29], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[68] );
 
       // Amplitude(s) for diagram number 1214
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[61], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[61], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30188,7 +30188,7 @@ namespace mg5amcCpu
       jamp_sv[70] -= amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[114] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30208,7 +30208,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[68], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30235,7 +30235,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1215
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[72], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[72], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30255,7 +30255,7 @@ namespace mg5amcCpu
       jamp_sv[70] += amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[115] -= amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[60], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30275,7 +30275,7 @@ namespace mg5amcCpu
       jamp_sv[77] -= amp_sv[0];
       jamp_sv[91] += amp_sv[0];
       jamp_sv[101] -= amp_sv[0];
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[24], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30302,7 +30302,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1216
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30314,7 +30314,7 @@ namespace mg5amcCpu
       jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30326,7 +30326,7 @@ namespace mg5amcCpu
       jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30345,7 +30345,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1217
-      helas_FFV1_0( w_fp[71], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[71], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30353,7 +30353,7 @@ namespace mg5amcCpu
       jamp_sv[55] -= amp_sv[0];
       jamp_sv[57] -= amp_sv[0];
       jamp_sv[59] += amp_sv[0];
-      helas_FFV1_0( w_fp[21], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30361,7 +30361,7 @@ namespace mg5amcCpu
       jamp_sv[56] += amp_sv[0];
       jamp_sv[57] -= amp_sv[0];
       jamp_sv[58] += amp_sv[0];
-      helas_FFV1_0( w_fp[66], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[66], w_fp[33], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30376,7 +30376,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1218
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[61], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30388,7 +30388,7 @@ namespace mg5amcCpu
       jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30400,7 +30400,7 @@ namespace mg5amcCpu
       jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[68], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30419,7 +30419,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1219
-      helas_FFV1_0( w_fp[46], w_fp[77], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[77], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30427,7 +30427,7 @@ namespace mg5amcCpu
       jamp_sv[22] -= amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[114] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[16], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[16], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30435,7 +30435,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[90] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[27], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[27], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30450,7 +30450,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1220
-      helas_VVVV1_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30470,7 +30470,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30490,7 +30490,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30510,7 +30510,7 @@ namespace mg5amcCpu
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      helas_VVVV1_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30530,7 +30530,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30550,7 +30550,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30570,7 +30570,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      helas_VVVV1_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30590,7 +30590,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[117] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30610,7 +30610,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[117] += amp_sv[0];
       jamp_sv[119] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30634,12 +30634,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1221 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1221
-      helas_VVV1P0_1( w_fp[0], w_fp[73], COUPs[0], 1.0, 0., 0., w_fp[27] );
-      helas_VVV1P0_1( w_fp[0], w_fp[79], COUPs[0], 1.0, 0., 0., w_fp[1] );
-      helas_VVV1P0_1( w_fp[0], w_fp[80], COUPs[0], 1.0, 0., 0., w_fp[16] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[73], COUPs[0], 1.0, 0., 0., w_fp[27] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[79], COUPs[0], 1.0, 0., 0., w_fp[1] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[80], COUPs[0], 1.0, 0., 0., w_fp[16] );
 
       // Amplitude(s) for diagram number 1221
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[27], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30659,7 +30659,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[1], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[1], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30679,7 +30679,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30706,7 +30706,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1222
-      helas_VVV1_0( w_fp[73], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[73], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30726,7 +30726,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[119] += amp_sv[0];
-      helas_VVV1_0( w_fp[79], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[79], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30746,7 +30746,7 @@ namespace mg5amcCpu
       jamp_sv[111] += amp_sv[0];
       jamp_sv[113] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_VVV1_0( w_fp[80], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[80], w_fp[6], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30773,7 +30773,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1223
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30785,7 +30785,7 @@ namespace mg5amcCpu
       jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30797,7 +30797,7 @@ namespace mg5amcCpu
       jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30816,7 +30816,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1224
-      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[113], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30824,7 +30824,7 @@ namespace mg5amcCpu
       jamp_sv[97] -= amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[101] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[113], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30832,7 +30832,7 @@ namespace mg5amcCpu
       jamp_sv[98] += amp_sv[0];
       jamp_sv[99] -= amp_sv[0];
       jamp_sv[100] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[113], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[113], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30847,7 +30847,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1225
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[27], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30859,7 +30859,7 @@ namespace mg5amcCpu
       jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30871,7 +30871,7 @@ namespace mg5amcCpu
       jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30890,7 +30890,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1226
-      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[2], w_fp[73], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30898,7 +30898,7 @@ namespace mg5amcCpu
       jamp_sv[38] -= amp_sv[0];
       jamp_sv[62] -= amp_sv[0];
       jamp_sv[86] += amp_sv[0];
-      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30906,7 +30906,7 @@ namespace mg5amcCpu
       jamp_sv[56] += amp_sv[0];
       jamp_sv[62] -= amp_sv[0];
       jamp_sv[80] += amp_sv[0];
-      helas_FFV1_0( w_fp[62], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[62], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30921,7 +30921,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1227
-      helas_VVVV1_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30941,7 +30941,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30961,7 +30961,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -30981,7 +30981,7 @@ namespace mg5amcCpu
       jamp_sv[77] += amp_sv[0];
       jamp_sv[110] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31001,7 +31001,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31021,7 +31021,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31041,7 +31041,7 @@ namespace mg5amcCpu
       jamp_sv[76] += amp_sv[0];
       jamp_sv[104] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31061,7 +31061,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31081,7 +31081,7 @@ namespace mg5amcCpu
       jamp_sv[95] -= amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
       jamp_sv[111] += amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31105,12 +31105,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1228 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1228
-      helas_VVV1P0_1( w_fp[0], w_fp[57], COUPs[0], 1.0, 0., 0., w_fp[62] );
-      helas_VVV1P0_1( w_fp[0], w_fp[81], COUPs[0], 1.0, 0., 0., w_fp[80] );
-      helas_VVV1P0_1( w_fp[0], w_fp[82], COUPs[0], 1.0, 0., 0., w_fp[79] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[57], COUPs[0], 1.0, 0., 0., w_fp[62] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[81], COUPs[0], 1.0, 0., 0., w_fp[80] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[82], COUPs[0], 1.0, 0., 0., w_fp[79] );
 
       // Amplitude(s) for diagram number 1228
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31130,7 +31130,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[110] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[80], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[80], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31150,7 +31150,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[104] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[5], w_fp[79], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[5], w_fp[79], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31177,7 +31177,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1229
-      helas_VVV1_0( w_fp[57], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[57], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31197,7 +31197,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[95] += amp_sv[0];
       jamp_sv[111] -= amp_sv[0];
-      helas_VVV1_0( w_fp[81], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[81], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31217,7 +31217,7 @@ namespace mg5amcCpu
       jamp_sv[89] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[105] -= amp_sv[0];
-      helas_VVV1_0( w_fp[82], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[82], w_fp[5], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31244,7 +31244,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1230
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31256,7 +31256,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31268,7 +31268,7 @@ namespace mg5amcCpu
       jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[39], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[39], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31287,7 +31287,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1231
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31295,7 +31295,7 @@ namespace mg5amcCpu
       jamp_sv[73] -= amp_sv[0];
       jamp_sv[75] -= amp_sv[0];
       jamp_sv[77] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31303,7 +31303,7 @@ namespace mg5amcCpu
       jamp_sv[74] += amp_sv[0];
       jamp_sv[75] -= amp_sv[0];
       jamp_sv[76] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[102], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[102], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31318,7 +31318,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1232
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31330,7 +31330,7 @@ namespace mg5amcCpu
       jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[80], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31342,7 +31342,7 @@ namespace mg5amcCpu
       jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[38], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[38], w_fp[2], w_fp[79], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31361,7 +31361,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1233
-      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[2], w_fp[57], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31369,7 +31369,7 @@ namespace mg5amcCpu
       jamp_sv[44] -= amp_sv[0];
       jamp_sv[68] -= amp_sv[0];
       jamp_sv[110] += amp_sv[0];
-      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31377,7 +31377,7 @@ namespace mg5amcCpu
       jamp_sv[58] += amp_sv[0];
       jamp_sv[68] -= amp_sv[0];
       jamp_sv[104] += amp_sv[0];
-      helas_FFV1_0( w_fp[104], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[104], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31392,7 +31392,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1234
-      helas_VVVV1_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31412,7 +31412,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31432,7 +31432,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31452,7 +31452,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[116] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31472,7 +31472,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31492,7 +31492,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31512,7 +31512,7 @@ namespace mg5amcCpu
       jamp_sv[93] += amp_sv[0];
       jamp_sv[106] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      helas_VVVV1_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31532,7 +31532,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31552,7 +31552,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
       jamp_sv[117] += amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31576,12 +31576,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 1235 OF 1240 ***
 
       // Wavefunction(s) for diagram number 1235
-      helas_VVV1P0_1( w_fp[0], w_fp[55], COUPs[0], 1.0, 0., 0., w_fp[104] );
-      helas_VVV1P0_1( w_fp[0], w_fp[83], COUPs[0], 1.0, 0., 0., w_fp[82] );
-      helas_VVV1P0_1( w_fp[0], w_fp[84], COUPs[0], 1.0, 0., 0., w_fp[81] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[55], COUPs[0], 1.0, 0., 0., w_fp[104] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[83], COUPs[0], 1.0, 0., 0., w_fp[82] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[84], COUPs[0], 1.0, 0., 0., w_fp[81] );
 
       // Amplitude(s) for diagram number 1235
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31601,7 +31601,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[116] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[82], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[82], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31621,7 +31621,7 @@ namespace mg5amcCpu
       jamp_sv[82] -= amp_sv[0];
       jamp_sv[92] += amp_sv[0];
       jamp_sv[106] -= amp_sv[0];
-      helas_VVV1_0( w_fp[8], w_fp[4], w_fp[81], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[8], w_fp[4], w_fp[81], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31648,7 +31648,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1236
-      helas_VVV1_0( w_fp[55], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[55], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31668,7 +31668,7 @@ namespace mg5amcCpu
       jamp_sv[71] += amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[117] -= amp_sv[0];
-      helas_VVV1_0( w_fp[83], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[83], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31688,7 +31688,7 @@ namespace mg5amcCpu
       jamp_sv[83] -= amp_sv[0];
       jamp_sv[93] += amp_sv[0];
       jamp_sv[107] -= amp_sv[0];
-      helas_VVV1_0( w_fp[84], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[84], w_fp[4], w_fp[56], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31715,7 +31715,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1237
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31727,7 +31727,7 @@ namespace mg5amcCpu
       jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31739,7 +31739,7 @@ namespace mg5amcCpu
       jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[33], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[33], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31758,7 +31758,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1238
-      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[114], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31766,7 +31766,7 @@ namespace mg5amcCpu
       jamp_sv[49] -= amp_sv[0];
       jamp_sv[51] -= amp_sv[0];
       jamp_sv[53] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[114], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31774,7 +31774,7 @@ namespace mg5amcCpu
       jamp_sv[50] += amp_sv[0];
       jamp_sv[51] -= amp_sv[0];
       jamp_sv[52] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[114], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[114], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31789,7 +31789,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1239
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31801,7 +31801,7 @@ namespace mg5amcCpu
       jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[82], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31813,7 +31813,7 @@ namespace mg5amcCpu
       jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_FFV1_0( w_fp[46], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[46], w_fp[2], w_fp[81], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31832,7 +31832,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 1240
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31840,7 +31840,7 @@ namespace mg5amcCpu
       jamp_sv[46] -= amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[116] += amp_sv[0];
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -31848,7 +31848,7 @@ namespace mg5amcCpu
       jamp_sv[82] += amp_sv[0];
       jamp_sv[92] -= amp_sv[0];
       jamp_sv[106] += amp_sv[0];
-      helas_FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
diff --git a/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h
index 24e8114e3a..624de4a7b3 100644
--- a/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h
@@ -1404,178 +1404,347 @@ namespace mg5amcCpu
 
 #ifndef MGONGPU_LINKER_HELAMPS
 
-#define helas_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
-#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
-#define helas_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_VVVV3_0 VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
-#define helas_VVVV4_0 VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CD_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1P0_1 VVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_1 FFV1_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_2 FFV1_2<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1P0_3 FFV1P0_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV3_0 VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV3_0 VVVV3_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV4_0 VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV4_0 VVVV4_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CI_ACCESS>
 
 #else
 
-#define helas_VVV1_0 linker_VVV1_0
-#define helas_VVV1P0_1 linker_VVV1P0_1
-#define helas_FFV1_0 linker_FFV1_0
-#define helas_FFV1_1 linker_FFV1_1
-#define helas_FFV1_2 linker_FFV1_2
-#define helas_FFV1P0_3 linker_FFV1P0_3
-#define helas_VVVV1_0 linker_VVVV1_0
-#define helas_VVVV1P0_1 linker_VVVV1P0_1
-#define helas_VVVV3_0 linker_VVVV3_0
-#define helas_VVVV3P0_1 linker_VVVV3P0_1
-#define helas_VVVV4_0 linker_VVVV4_0
-#define helas_VVVV4P0_1 linker_VVVV4P0_1
+#define helas_CD_VVV1_0 linker_CD_VVV1_0
+#define helas_CI_VVV1_0 linker_CI_VVV1_0
+#define helas_CD_VVV1P0_1 linker_CD_VVV1P0_1
+#define helas_CI_VVV1P0_1 linker_CI_VVV1P0_1
+#define helas_CD_FFV1_0 linker_CD_FFV1_0
+#define helas_CI_FFV1_0 linker_CI_FFV1_0
+#define helas_CD_FFV1_1 linker_CD_FFV1_1
+#define helas_CI_FFV1_1 linker_CI_FFV1_1
+#define helas_CD_FFV1_2 linker_CD_FFV1_2
+#define helas_CI_FFV1_2 linker_CI_FFV1_2
+#define helas_CD_FFV1P0_3 linker_CD_FFV1P0_3
+#define helas_CI_FFV1P0_3 linker_CI_FFV1P0_3
+#define helas_CD_VVVV1_0 linker_CD_VVVV1_0
+#define helas_CI_VVVV1_0 linker_CI_VVVV1_0
+#define helas_CD_VVVV1P0_1 linker_CD_VVVV1P0_1
+#define helas_CI_VVVV1P0_1 linker_CI_VVVV1P0_1
+#define helas_CD_VVVV3_0 linker_CD_VVVV3_0
+#define helas_CI_VVVV3_0 linker_CI_VVVV3_0
+#define helas_CD_VVVV3P0_1 linker_CD_VVVV3P0_1
+#define helas_CI_VVVV3P0_1 linker_CI_VVVV3P0_1
+#define helas_CD_VVVV4_0 linker_CD_VVVV4_0
+#define helas_CI_VVVV4_0 linker_CI_VVVV4_0
+#define helas_CD_VVVV4P0_1 linker_CD_VVVV4P0_1
+#define helas_CI_VVVV4P0_1 linker_CI_VVVV4P0_1
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] );
+  linker_CI_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] );
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] );
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] );
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV1_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] );
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV1P0_1( const fptype allV2[],
+  linker_CI_FFV1_1( const fptype allF2[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
                     const fptype M1,
                     const fptype W1,
-                    fptype allV1[] );
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV3_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] );
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV3P0_1( const fptype allV2[],
+  linker_CI_FFV1_2( const fptype allF1[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] );
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
   __device__ void
-  linker_VVVV4_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] );
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
   __device__ void
-  linker_VVVV4P0_1( const fptype allV2[],
-                    const fptype allV3[],
-                    const fptype allV4[],
-                    const fptype allCOUP[],
-                    const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] );
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV4_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV4_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt
index a5d8f488eb..74a5555444 100644
--- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt
+++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt
@@ -61,7 +61,7 @@ set zerowidth_tchannel F
 define q = u c d s u~ c~ d~ s~
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005703926086425781 [0m
+[1;32mDEBUG: model prefixing  takes 0.0056743621826171875 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams.
 INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. 
 INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. 
 INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. 
-8 processes with 40 diagrams generated in 0.080 s
+8 processes with 40 diagrams generated in 0.081 s
 Total: 8 processes with 40 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -200,8 +200,8 @@ INFO: Combined process g c~ > t t~ c~ WEIGHTED<=3 @1 with process g u~ > t t~ u~
 INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Creating files in directory P1_gu_ttxu 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fead16eab20> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f794f66ab20> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -217,12 +217,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gu_ttxu 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1626][0m [0m
 INFO: Creating files in directory P1_gux_ttxux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fead151d370> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f794f49d370> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -238,11 +238,11 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gux_ttxux 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1626][0m [0m
 Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s
-Wrote files for 32 helas calls in 0.258 s
+Wrote files for 32 helas calls in 0.255 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVV1 routines[0m
@@ -250,7 +250,7 @@ ALOHA: aloha creates 2 routines in  0.152 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVV1 routines[0m
-ALOHA: aloha creates 4 routines in  0.136 s
+ALOHA: aloha creates 4 routines in  0.140 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -298,10 +298,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.833s
-user	0m2.015s
-sys	0m0.320s
-Code generation completed in 2 seconds
+real	0m2.337s
+user	0m2.045s
+sys	0m0.286s
+Code generation completed in 3 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/HelAmps.cc
index b1b8a27d42..fce6598138 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/HelAmps.cc
@@ -62,77 +62,150 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] )
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
   {
     return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] )
+  linker_CI_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CI_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
   {
     return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CI_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] )
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
   {
     return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
   __device__ void
-  linker_VVV1_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return VVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
 }
 #endif
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc
index 1eba0f4747..dd30823b89 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc
@@ -337,11 +337,11 @@ namespace mg5amcCpu
 
       oxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 );
 
-      helas_FFV1_2( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1_2( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -352,11 +352,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 5 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      helas_FFV1P0_3( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[7] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_CD_FFV1P0_3( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[7] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -367,10 +367,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 5 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
 
       // Amplitude(s) for diagram number 3
-      helas_FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -381,10 +381,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 5 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_1( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_CD_FFV1_1( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -398,7 +398,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 5
-      helas_VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc
index e6590d28ce..0680f3eac0 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc
@@ -337,11 +337,11 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 );
 
-      helas_FFV1_2( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1_2( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -352,11 +352,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 5 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      helas_FFV1P0_3( w_fp[4], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_CD_FFV1P0_3( w_fp[4], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -367,10 +367,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 5 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
 
       // Amplitude(s) for diagram number 3
-      helas_FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -381,10 +381,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 5 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_1( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_CD_FFV1_1( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -398,7 +398,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 5
-      helas_VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h b/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h
index 496d8197a9..8f3a882684 100644
--- a/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h
@@ -1077,77 +1077,145 @@ namespace mg5amcCpu
 
 #ifndef MGONGPU_LINKER_HELAMPS
 
-#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
-#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
-#define helas_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CD_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_1 FFV1_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_2 FFV1_2<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1P0_3 FFV1P0_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
 
 #else
 
-#define helas_FFV1_0 linker_FFV1_0
-#define helas_FFV1_1 linker_FFV1_1
-#define helas_FFV1_2 linker_FFV1_2
-#define helas_FFV1P0_3 linker_FFV1P0_3
-#define helas_VVV1_0 linker_VVV1_0
+#define helas_CD_FFV1_0 linker_CD_FFV1_0
+#define helas_CI_FFV1_0 linker_CI_FFV1_0
+#define helas_CD_FFV1_1 linker_CD_FFV1_1
+#define helas_CI_FFV1_1 linker_CI_FFV1_1
+#define helas_CD_FFV1_2 linker_CD_FFV1_2
+#define helas_CI_FFV1_2 linker_CI_FFV1_2
+#define helas_CD_FFV1P0_3 linker_CD_FFV1P0_3
+#define helas_CI_FFV1P0_3 linker_CI_FFV1P0_3
+#define helas_CD_VVV1_0 linker_CD_VVV1_0
+#define helas_CI_VVV1_0 linker_CI_VVV1_0
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] );
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] );
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] );
+  linker_CI_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVV1_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CI_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt
index 269301a91d..fd260607af 100644
--- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt
+++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt
@@ -61,7 +61,7 @@ set zerowidth_tchannel F
 define q = u c d s u~ c~ d~ s~
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.0057561397552490234 [0m
+[1;32mDEBUG: model prefixing  takes 0.005650043487548828 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams.
 INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. 
 INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. 
 INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. 
-8 processes with 40 diagrams generated in 0.081 s
+8 processes with 40 diagrams generated in 0.080 s
 Total: 8 processes with 40 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq
 Load PLUGIN.CUDACPP_OUTPUT
@@ -214,7 +214,7 @@ Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.149 s
+ALOHA: aloha creates 2 routines in  0.150 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -233,6 +233,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/
 quit
 
 real	0m0.680s
-user	0m0.618s
-sys	0m0.056s
+user	0m0.620s
+sys	0m0.051s
 Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/HelAmps.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/HelAmps.cc
index b1b8a27d42..fce6598138 100644
--- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/HelAmps.cc
@@ -62,77 +62,150 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] )
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
   {
     return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] )
+  linker_CI_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CI_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
   {
     return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CI_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] )
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
   {
     return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
   __device__ void
-  linker_VVV1_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return VVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
 }
 #endif
diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc
index fd5a6dd91d..1ae98649ad 100644
--- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc
+++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc
@@ -337,11 +337,11 @@ namespace mg5amcCpu
 
       oxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 );
 
-      helas_FFV1_2( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1_2( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -351,11 +351,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 5 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      helas_FFV1P0_3( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[7] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_CD_FFV1P0_3( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[7] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -365,10 +365,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 5 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
 
       // Amplitude(s) for diagram number 3
-      helas_FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -378,10 +378,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 5 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_1( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_CD_FFV1_1( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -394,7 +394,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 5
-      helas_VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc
index 6393fe7844..40de8ac97a 100644
--- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc
+++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc
@@ -337,11 +337,11 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 );
 
-      helas_FFV1_2( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1_2( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -351,11 +351,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 5 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      helas_FFV1P0_3( w_fp[4], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_CD_FFV1P0_3( w_fp[4], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -365,10 +365,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 5 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] );
 
       // Amplitude(s) for diagram number 3
-      helas_FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -378,10 +378,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 5 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_1( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_CD_FFV1_1( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -394,7 +394,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 5
-      helas_VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
diff --git a/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h b/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h
index 496d8197a9..8f3a882684 100644
--- a/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h
@@ -1077,77 +1077,145 @@ namespace mg5amcCpu
 
 #ifndef MGONGPU_LINKER_HELAMPS
 
-#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
-#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
-#define helas_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CD_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_1 FFV1_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_2 FFV1_2<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1P0_3 FFV1P0_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
 
 #else
 
-#define helas_FFV1_0 linker_FFV1_0
-#define helas_FFV1_1 linker_FFV1_1
-#define helas_FFV1_2 linker_FFV1_2
-#define helas_FFV1P0_3 linker_FFV1P0_3
-#define helas_VVV1_0 linker_VVV1_0
+#define helas_CD_FFV1_0 linker_CD_FFV1_0
+#define helas_CI_FFV1_0 linker_CI_FFV1_0
+#define helas_CD_FFV1_1 linker_CD_FFV1_1
+#define helas_CI_FFV1_1 linker_CI_FFV1_1
+#define helas_CD_FFV1_2 linker_CD_FFV1_2
+#define helas_CI_FFV1_2 linker_CI_FFV1_2
+#define helas_CD_FFV1P0_3 linker_CD_FFV1P0_3
+#define helas_CI_FFV1P0_3 linker_CI_FFV1P0_3
+#define helas_CD_VVV1_0 linker_CD_VVV1_0
+#define helas_CI_VVV1_0 linker_CI_VVV1_0
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] );
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] );
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] );
+  linker_CI_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVV1_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CI_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt
index c7dccb0dd3..721fa8b560 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt
+++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt
@@ -149,8 +149,8 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > b b~ HIG<=1 HIW<=1 @1 
 INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 
 INFO: Creating files in directory P1_gg_bbx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fb17051fc70> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2ecd94bc70> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -166,23 +166,23 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > b b~ HIG<=1 HIW<=1 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_bbx 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/SubProcesses/P1_gg_bbx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 4 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 4 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1626][0m [0m
 Generated helas calls for 1 subprocesses (4 diagrams) in 0.009 s
-Wrote files for 12 helas calls in 0.123 s
+Wrote files for 12 helas calls in 0.122 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVS3 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFS2 routines[0m
-ALOHA: aloha creates 4 routines in  0.272 s
+ALOHA: aloha creates 4 routines in  0.277 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVS3 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFS2 routines[0m
-ALOHA: aloha creates 8 routines in  0.255 s
+ALOHA: aloha creates 8 routines in  0.261 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVS3
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -221,9 +221,9 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.226s
-user	0m1.956s
-sys	0m0.270s
+real	0m2.240s
+user	0m1.968s
+sys	0m0.266s
 Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/HelAmps.cc
index 649ad4ab18..d2f3f61522 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/HelAmps.cc
@@ -62,92 +62,180 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], V2[6]
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], V2[6] (dependent couplings)
   __device__ void
-  linker_VVS3_3( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M3,
-                 const fptype W3,
-                 fptype allS3[] )
+  linker_CD_VVS3_3( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allS3[] )
   {
     return VVS3_3<W_ACCESS, CD_ACCESS>( allV1, allV2, allCOUP, Ccoeff, M3, W3, allS3 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], V2[6] (independent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] )
+  linker_CI_VVS3_3( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allS3[] )
+  {
+    return VVS3_3<W_ACCESS, CI_ACCESS>( allV1, allV2, allCOUP, Ccoeff, M3, W3, allS3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
     return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] )
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
   {
     return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] )
+  linker_CI_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CI_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
   {
     return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], S3[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CI_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], S3[6] (dependent couplings)
   __device__ void
-  linker_FFS2_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allS3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_FFS2_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return FFS2_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allS3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], S3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFS2_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return FFS2_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allS3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
 }
 #endif
diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc
index 538b157bc0..4673604015 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc
+++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc
@@ -332,10 +332,10 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 );
 
-      helas_VVS3_3( w_fp[0], w_fp[1], COUPs[0], 1.0, cIPD[1], cIPD[2], w_fp[4] );
+      helas_CD_VVS3_3( w_fp[0], w_fp[1], COUPs[0], 1.0, cIPD[1], cIPD[2], w_fp[4] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFS2_0( w_fp[3], w_fp[2], w_fp[4], COUPs[ndcoup + 0], 1.0, &amp_fp[0] );
+      helas_CI_FFS2_0( w_fp[3], w_fp[2], w_fp[4], COUPs[ndcoup + 0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -345,10 +345,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 4 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[4] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -359,10 +359,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 4 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[2], 1.0, cIPD[0], 0., w_fp[4] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[2], 1.0, cIPD[0], 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 3
-      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -372,10 +372,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 4 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[2], 1.0, cIPD[0], 0., w_fp[4] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[2], 1.0, cIPD[0], 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h b/epochX/cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h
index adae1c7f83..e500735267 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h
+++ b/epochX/cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h
@@ -1120,91 +1120,173 @@ namespace mg5amcCpu
 
 #ifndef MGONGPU_LINKER_HELAMPS
 
-#define helas_VVS3_3 VVS3_3<W_ACCESS, CD_ACCESS>
-#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
-#define helas_FFS2_0 FFS2_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CD_VVS3_3 VVS3_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVS3_3 VVS3_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1P0_1 VVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_1 FFV1_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_2 FFV1_2<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFS2_0 FFS2_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFS2_0 FFS2_0<W_ACCESS, A_ACCESS, CI_ACCESS>
 
 #else
 
-#define helas_VVS3_3 linker_VVS3_3
-#define helas_VVV1P0_1 linker_VVV1P0_1
-#define helas_FFV1_0 linker_FFV1_0
-#define helas_FFV1_1 linker_FFV1_1
-#define helas_FFV1_2 linker_FFV1_2
-#define helas_FFS2_0 linker_FFS2_0
+#define helas_CD_VVS3_3 linker_CD_VVS3_3
+#define helas_CI_VVS3_3 linker_CI_VVS3_3
+#define helas_CD_VVV1P0_1 linker_CD_VVV1P0_1
+#define helas_CI_VVV1P0_1 linker_CI_VVV1P0_1
+#define helas_CD_FFV1_0 linker_CD_FFV1_0
+#define helas_CI_FFV1_0 linker_CI_FFV1_0
+#define helas_CD_FFV1_1 linker_CD_FFV1_1
+#define helas_CI_FFV1_1 linker_CI_FFV1_1
+#define helas_CD_FFV1_2 linker_CD_FFV1_2
+#define helas_CI_FFV1_2 linker_CI_FFV1_2
+#define helas_CD_FFS2_0 linker_CD_FFS2_0
+#define helas_CI_FFS2_0 linker_CI_FFS2_0
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], V2[6]
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], V2[6] (dependent couplings)
   __device__ void
-  linker_VVS3_3( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M3,
-                 const fptype W3,
-                 fptype allS3[] );
+  linker_CD_VVS3_3( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allS3[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], V2[6] (independent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] );
+  linker_CI_VVS3_3( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allS3[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] );
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] );
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], S3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], S3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFS2_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], S3[6] (independent couplings)
   __device__ void
-  linker_FFS2_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allS3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CI_FFS2_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt
index 12eb55167f..7a3a1de366 100644
--- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt
+++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt
@@ -156,7 +156,7 @@ ALOHA: aloha creates VVS3 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFS2 routines[0m
-ALOHA: aloha creates 4 routines in  0.275 s
+ALOHA: aloha creates 4 routines in  0.271 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVS3
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -175,7 +175,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. 
 quit
 
-real	0m0.668s
-user	0m0.605s
+real	0m0.673s
+user	0m0.602s
 sys	0m0.058s
 Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/HelAmps.cc b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/HelAmps.cc
index 649ad4ab18..d2f3f61522 100644
--- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/HelAmps.cc
@@ -62,92 +62,180 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], V2[6]
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], V2[6] (dependent couplings)
   __device__ void
-  linker_VVS3_3( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M3,
-                 const fptype W3,
-                 fptype allS3[] )
+  linker_CD_VVS3_3( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allS3[] )
   {
     return VVS3_3<W_ACCESS, CD_ACCESS>( allV1, allV2, allCOUP, Ccoeff, M3, W3, allS3 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], V2[6] (independent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] )
+  linker_CI_VVS3_3( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allS3[] )
+  {
+    return VVS3_3<W_ACCESS, CI_ACCESS>( allV1, allV2, allCOUP, Ccoeff, M3, W3, allS3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
     return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] )
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
   {
     return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] )
+  linker_CI_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CI_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
   {
     return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], S3[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CI_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], S3[6] (dependent couplings)
   __device__ void
-  linker_FFS2_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allS3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_FFS2_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return FFS2_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allS3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], S3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFS2_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return FFS2_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allS3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
 }
 #endif
diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.cc b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.cc
index 437dbf6116..e15ad3ec69 100644
--- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.cc
+++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.cc
@@ -332,10 +332,10 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 );
 
-      helas_VVS3_3( w_fp[0], w_fp[1], COUPs[0], 1.0, cIPD[1], cIPD[2], w_fp[4] );
+      helas_CD_VVS3_3( w_fp[0], w_fp[1], COUPs[0], 1.0, cIPD[1], cIPD[2], w_fp[4] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFS2_0( w_fp[3], w_fp[2], w_fp[4], COUPs[ndcoup + 0], 1.0, &amp_fp[0] );
+      helas_CI_FFS2_0( w_fp[3], w_fp[2], w_fp[4], COUPs[ndcoup + 0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -344,10 +344,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 4 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[4] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -357,10 +357,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 4 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[2], 1.0, cIPD[0], 0., w_fp[4] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[2], 1.0, cIPD[0], 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 3
-      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -369,10 +369,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 4 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[2], 1.0, cIPD[0], 0., w_fp[4] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[2], 1.0, cIPD[0], 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
diff --git a/epochX/cudacpp/heft_gg_bb.sa/src/HelAmps_heft.h b/epochX/cudacpp/heft_gg_bb.sa/src/HelAmps_heft.h
index adae1c7f83..e500735267 100644
--- a/epochX/cudacpp/heft_gg_bb.sa/src/HelAmps_heft.h
+++ b/epochX/cudacpp/heft_gg_bb.sa/src/HelAmps_heft.h
@@ -1120,91 +1120,173 @@ namespace mg5amcCpu
 
 #ifndef MGONGPU_LINKER_HELAMPS
 
-#define helas_VVS3_3 VVS3_3<W_ACCESS, CD_ACCESS>
-#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
-#define helas_FFS2_0 FFS2_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CD_VVS3_3 VVS3_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVS3_3 VVS3_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1P0_1 VVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_1 FFV1_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_2 FFV1_2<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFS2_0 FFS2_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFS2_0 FFS2_0<W_ACCESS, A_ACCESS, CI_ACCESS>
 
 #else
 
-#define helas_VVS3_3 linker_VVS3_3
-#define helas_VVV1P0_1 linker_VVV1P0_1
-#define helas_FFV1_0 linker_FFV1_0
-#define helas_FFV1_1 linker_FFV1_1
-#define helas_FFV1_2 linker_FFV1_2
-#define helas_FFS2_0 linker_FFS2_0
+#define helas_CD_VVS3_3 linker_CD_VVS3_3
+#define helas_CI_VVS3_3 linker_CI_VVS3_3
+#define helas_CD_VVV1P0_1 linker_CD_VVV1P0_1
+#define helas_CI_VVV1P0_1 linker_CI_VVV1P0_1
+#define helas_CD_FFV1_0 linker_CD_FFV1_0
+#define helas_CI_FFV1_0 linker_CI_FFV1_0
+#define helas_CD_FFV1_1 linker_CD_FFV1_1
+#define helas_CI_FFV1_1 linker_CI_FFV1_1
+#define helas_CD_FFV1_2 linker_CD_FFV1_2
+#define helas_CI_FFV1_2 linker_CI_FFV1_2
+#define helas_CD_FFS2_0 linker_CD_FFS2_0
+#define helas_CI_FFS2_0 linker_CI_FFS2_0
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], V2[6]
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], V2[6] (dependent couplings)
   __device__ void
-  linker_VVS3_3( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M3,
-                 const fptype W3,
-                 fptype allS3[] );
+  linker_CD_VVS3_3( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allS3[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], V2[6] (independent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] );
+  linker_CI_VVS3_3( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allS3[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] );
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] );
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], S3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], S3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFS2_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], S3[6] (independent couplings)
   __device__ void
-  linker_FFS2_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allS3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CI_FFS2_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt
index 610871ee77..fe9270d9f7 100644
--- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt
+++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt
@@ -61,7 +61,7 @@ set zerowidth_tchannel F
 define j = p
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005677700042724609 [0m
+[1;32mDEBUG: model prefixing  takes 0.005668163299560547 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~
 INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ 
 INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ 
 INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ 
-5 processes with 7 diagrams generated in 0.030 s
+5 processes with 7 diagrams generated in 0.031 s
 Total: 5 processes with 7 diagrams
 add process p p > t t~ j @1
 INFO: Checking for minimal orders which gives processes. 
@@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~
 INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ 
 INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ 
 INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. 
-65 processes with 1119 diagrams generated in 1.868 s
+65 processes with 1119 diagrams generated in 1.879 s
 Total: 83 processes with 1202 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -499,8 +499,8 @@ INFO: Combined process c c~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED
 INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 
 INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 
 INFO: Creating files in directory P2_gg_ttxgg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2000cf7700> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f920155a700> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -516,12 +516,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxgg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 105 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 105 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [1;30m[model_handling.py at line 1626][0m [0m
 INFO: Creating files in directory P2_gg_ttxuux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f20013acf10> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9201815490> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -537,12 +537,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxuux [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1626][0m [0m
 INFO: Creating files in directory P2_gu_ttxgu 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f20013acf10> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9201bec4f0> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -558,12 +558,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gu_ttxgu [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1626][0m [0m
 INFO: Creating files in directory P2_gux_ttxgux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2001001fa0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9201815490> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -579,12 +579,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gux_ttxgux [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1626][0m [0m
 INFO: Creating files in directory P2_uux_ttxgg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f20013acf10> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f920155a5b0> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -600,12 +600,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxgg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1626][0m [0m
 INFO: Creating files in directory P1_gg_ttxg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f20013acf10> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9201bec4f0> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -621,12 +621,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxg 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gg_ttxg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1626][0m [0m
 INFO: Creating files in directory P2_uu_ttxuu 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2000f93a60> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f920183ad60> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -642,12 +642,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uu_ttxuu [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1626][0m [0m
 INFO: Creating files in directory P2_uux_ttxuux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f20013fb3a0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f920182a9a0> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -663,12 +663,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxuux [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1626][0m [0m
 INFO: Creating files in directory P2_uxux_ttxuxux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2000ff4940> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9201bec4f0> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -684,12 +684,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxux_ttxuxux [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1626][0m [0m
 INFO: Creating files in directory P2_uc_ttxuc 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2000fc8b20> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f92018513d0> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -705,12 +705,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uc_ttxuc [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1626][0m [0m
 INFO: Creating files in directory P2_uux_ttxccx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f20013894f0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f92017f7bb0> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -726,12 +726,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxccx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1626][0m [0m
 INFO: Creating files in directory P2_ucx_ttxucx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2000fe1a60> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f92017fdb80> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -747,12 +747,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_ucx_ttxucx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1626][0m [0m
 INFO: Creating files in directory P2_uxcx_ttxuxcx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2000e099d0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9201bec4f0> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -768,12 +768,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxcx_ttxuxcx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1626][0m [0m
 INFO: Creating files in directory P1_gu_ttxu 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f20013910d0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9201812df0> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -789,12 +789,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gu_ttxu 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gu_ttxu [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1626][0m [0m
 INFO: Creating files in directory P1_gux_ttxux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2000fc7430> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9201bec4f0> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -810,12 +810,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gux_ttxux 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gux_ttxux [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1626][0m [0m
 INFO: Creating files in directory P1_uux_ttxg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2000f93b80> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9201c5eaf0> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -831,12 +831,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxg 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_uux_ttxg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1626][0m [0m
 INFO: Creating files in directory P0_gg_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2000fd3040> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9201c5eaf0> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -852,12 +852,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1626][0m [0m
 INFO: Creating files in directory P0_uux_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2000f93b80> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9201851b20> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -873,25 +873,25 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttx 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_uux_ttx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 1 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1} [1;30m[model_handling.py at line 1614][0m [0m
-Generated helas calls for 18 subprocesses (372 diagrams) in 1.332 s
-Wrote files for 810 helas calls in 3.619 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 1 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1} [1;30m[model_handling.py at line 1626][0m [0m
+Generated helas calls for 18 subprocesses (372 diagrams) in 1.336 s
+Wrote files for 810 helas calls in 3.622 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.350 s
+ALOHA: aloha creates 5 routines in  0.347 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 10 routines in  0.327 s
+ALOHA: aloha creates 10 routines in  0.326 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -1051,9 +1051,9 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m11.676s
-user	0m10.606s
-sys	0m0.897s
+real	0m11.543s
+user	0m10.575s
+sys	0m0.930s
 Code generation completed in 12 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/HelAmps.cc
index 845cf9fd87..ebe42b3ce3 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/HelAmps.cc
@@ -62,185 +62,366 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] )
+  linker_CI_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+    return VVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
-    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] )
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
-    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+    return VVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] )
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] )
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
-    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+    return FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV1_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] )
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
   {
-    return VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV1P0_1( const fptype allV2[],
+  linker_CI_FFV1_1( const fptype allF2[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
                     const fptype M1,
                     const fptype W1,
-                    fptype allV1[] )
+                    fptype allF1[] )
   {
-    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+    return FFV1_1<W_ACCESS, CI_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV3_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] )
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
   {
-    return VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV3P0_1( const fptype allV2[],
+  linker_CI_FFV1_2( const fptype allF1[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] )
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CI_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV3_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
   {
     return VVVV3P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV3P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
   __device__ void
-  linker_VVVV4_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] )
+  linker_CD_VVVV4_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
   {
     return VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
   __device__ void
-  linker_VVVV4P0_1( const fptype allV2[],
-                    const fptype allV3[],
-                    const fptype allV4[],
-                    const fptype allCOUP[],
-                    const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] )
+  linker_CI_VVVV4_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV4_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
   {
     return VVVV4P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] )
+  {
+    return VVVV4P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allV4, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
 }
 #endif
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc
index 6e4e81d6bf..8617d52a7d 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc
@@ -332,10 +332,10 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 );
 
-      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -346,10 +346,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 3 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -359,10 +359,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 3 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 3
-      helas_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc
index 9e3e37be16..60486b8537 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc
@@ -335,10 +335,10 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 );
 
-      helas_FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[4] );
+      helas_CD_FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc
index 3fa4e019da..6ff382a87e 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc
@@ -334,11 +334,11 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 );
 
-      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] );
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      helas_VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -351,10 +351,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 16 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -365,10 +365,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 16 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      helas_FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -379,11 +379,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 16 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -393,10 +393,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 16 ***
 
       // Wavefunction(s) for diagram number 5
-      helas_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 5
-      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -410,7 +410,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      helas_FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -420,11 +420,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 16 ***
 
       // Wavefunction(s) for diagram number 7
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
 
       // Amplitude(s) for diagram number 7
-      helas_FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -437,7 +437,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 8
-      helas_FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -451,7 +451,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 9
-      helas_FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -461,10 +461,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 16 ***
 
       // Wavefunction(s) for diagram number 10
-      helas_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 10
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -478,7 +478,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 11
-      helas_FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -492,7 +492,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 12
-      helas_VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -508,7 +508,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 13
-      helas_FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -521,7 +521,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      helas_FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -534,7 +534,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 15
-      helas_VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -547,22 +547,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 16 OF 16 ***
 
       // Wavefunction(s) for diagram number 16
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 16
-      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[4] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc
index b2f73fb903..8bdfb8de9a 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc
@@ -337,11 +337,11 @@ namespace mg5amcCpu
 
       oxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 );
 
-      helas_FFV1_2( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] );
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1_2( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -352,11 +352,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 5 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      helas_FFV1P0_3( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_CD_FFV1P0_3( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[7] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -367,10 +367,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 5 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
 
       // Amplitude(s) for diagram number 3
-      helas_FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -381,10 +381,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 5 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_1( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] );
+      helas_CD_FFV1_1( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -398,7 +398,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 5
-      helas_VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc
index 51d5ffc4ff..591bab4e33 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc
@@ -337,11 +337,11 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 );
 
-      helas_FFV1_2( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] );
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1_2( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -352,11 +352,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 5 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      helas_FFV1P0_3( w_fp[4], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_CD_FFV1P0_3( w_fp[4], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -367,10 +367,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 5 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
 
       // Amplitude(s) for diagram number 3
-      helas_FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -381,10 +381,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 5 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_1( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] );
+      helas_CD_FFV1_1( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -398,7 +398,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 5
-      helas_VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc
index 09af3b4355..d5aa75abab 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc
@@ -337,11 +337,11 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 );
 
-      helas_FFV1_2( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[5] );
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1_2( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[5] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -352,11 +352,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 5 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
-      helas_FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_CD_FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -367,10 +367,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 5 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] );
 
       // Amplitude(s) for diagram number 3
-      helas_FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -381,10 +381,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 5 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_1( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[5] );
+      helas_CD_FFV1_1( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[0], w_fp[5], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[0], w_fp[5], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -398,7 +398,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 5
-      helas_VVV1_0( w_fp[4], w_fp[7], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[7], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc
index 2f80b93422..9a656c874a 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc
@@ -336,11 +336,11 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 );
 
-      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
 
       // Amplitude(s) for diagram number 1
-      helas_VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -349,7 +349,7 @@ namespace mg5amcCpu
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -358,7 +358,7 @@ namespace mg5amcCpu
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -371,10 +371,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 123 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[8] );
+      helas_CD_VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      helas_VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -391,10 +391,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 123 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[9] );
+      helas_CD_VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 3
-      helas_VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -411,10 +411,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 123 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 4
-      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -431,11 +431,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 123 ***
 
       // Wavefunction(s) for diagram number 5
-      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
-      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 5
-      helas_FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -449,7 +449,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -462,10 +462,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 123 ***
 
       // Wavefunction(s) for diagram number 7
-      helas_FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 7
-      helas_FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -476,10 +476,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 8 OF 123 ***
 
       // Wavefunction(s) for diagram number 8
-      helas_FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
 
       // Amplitude(s) for diagram number 8
-      helas_FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -493,7 +493,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 9
-      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -506,10 +506,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 123 ***
 
       // Wavefunction(s) for diagram number 10
-      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
 
       // Amplitude(s) for diagram number 10
-      helas_FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -520,10 +520,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 123 ***
 
       // Wavefunction(s) for diagram number 11
-      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
 
       // Amplitude(s) for diagram number 11
-      helas_FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -537,7 +537,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 12
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -553,7 +553,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 13
-      helas_FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -567,7 +567,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -583,7 +583,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 15
-      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -599,7 +599,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 16
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -612,12 +612,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 17 OF 123 ***
 
       // Wavefunction(s) for diagram number 17
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
-      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      helas_FFV1_1( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_CD_FFV1_1( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 17
-      helas_FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -627,10 +627,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 18 OF 123 ***
 
       // Wavefunction(s) for diagram number 18
-      helas_FFV1_1( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_1( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 18
-      helas_FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -643,7 +643,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 19
-      helas_FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -654,11 +654,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 20 OF 123 ***
 
       // Wavefunction(s) for diagram number 20
-      helas_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      helas_FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] );
 
       // Amplitude(s) for diagram number 20
-      helas_VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -674,7 +674,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 21
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -688,7 +688,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 22
-      helas_FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -699,10 +699,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 23 OF 123 ***
 
       // Wavefunction(s) for diagram number 23
-      helas_VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[18] );
+      helas_CD_VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[18] );
 
       // Amplitude(s) for diagram number 23
-      helas_VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -718,7 +718,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 24
-      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -732,7 +732,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 25
-      helas_FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -743,10 +743,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 26 OF 123 ***
 
       // Wavefunction(s) for diagram number 26
-      helas_FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[19] );
+      helas_CD_FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[19] );
 
       // Amplitude(s) for diagram number 26
-      helas_FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -759,7 +759,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 27
-      helas_FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -772,7 +772,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 28
-      helas_FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -785,7 +785,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 29
-      helas_FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -798,7 +798,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 30
-      helas_FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -812,7 +812,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 31
-      helas_VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -825,22 +825,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 32 OF 123 ***
 
       // Wavefunction(s) for diagram number 32
-      helas_VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] );
-      helas_VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[19] );
-      helas_VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[8] );
+      helas_CD_VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] );
+      helas_CD_VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[19] );
+      helas_CD_VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 32
-      helas_FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[5] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[4] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[2] += amp_sv[0];
       jamp_sv[4] += amp_sv[0];
@@ -849,12 +849,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 33 OF 123 ***
 
       // Wavefunction(s) for diagram number 33
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
-      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
-      helas_FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
 
       // Amplitude(s) for diagram number 33
-      helas_FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -864,10 +864,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 34 OF 123 ***
 
       // Wavefunction(s) for diagram number 34
-      helas_FFV1_2( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_CD_FFV1_2( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 34
-      helas_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 34 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -880,7 +880,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 35
-      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -891,10 +891,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 36 OF 123 ***
 
       // Wavefunction(s) for diagram number 36
-      helas_FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[22] );
+      helas_CD_FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[22] );
 
       // Amplitude(s) for diagram number 36
-      helas_VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -910,7 +910,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 37
-      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 37 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -924,7 +924,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 38
-      helas_FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 38 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -938,7 +938,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 39
-      helas_VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 39 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -954,7 +954,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 40
-      helas_FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 40 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -968,7 +968,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 41
-      helas_FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 41 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -979,10 +979,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 42 OF 123 ***
 
       // Wavefunction(s) for diagram number 42
-      helas_FFV1_2( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_CD_FFV1_2( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
 
       // Amplitude(s) for diagram number 42
-      helas_FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 42 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -995,7 +995,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 43
-      helas_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 43 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1008,7 +1008,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 44
-      helas_FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 44 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1021,7 +1021,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 45
-      helas_FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 45 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1034,7 +1034,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 46
-      helas_FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 46 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1048,7 +1048,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 47
-      helas_VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 47 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1064,17 +1064,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 48
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] += amp_sv[0];
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[11] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[17] -= amp_sv[0];
       jamp_sv[21] += amp_sv[0];
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[15] += amp_sv[0];
       jamp_sv[21] += amp_sv[0];
@@ -1083,11 +1083,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 49 OF 123 ***
 
       // Wavefunction(s) for diagram number 49
-      helas_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[12] );
-      helas_FFV1_2( w_fp[3], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[12] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
 
       // Amplitude(s) for diagram number 49
-      helas_FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 49 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1098,10 +1098,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 50 OF 123 ***
 
       // Wavefunction(s) for diagram number 50
-      helas_VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_CD_VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[23] );
 
       // Amplitude(s) for diagram number 50
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 50 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1117,7 +1117,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 51
-      helas_FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 51 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1128,10 +1128,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 52 OF 123 ***
 
       // Wavefunction(s) for diagram number 52
-      helas_FFV1_1( w_fp[2], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
 
       // Amplitude(s) for diagram number 52
-      helas_FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 52 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1145,7 +1145,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 53
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 53 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1161,7 +1161,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 54
-      helas_FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 54 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1175,7 +1175,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 55
-      helas_FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 55 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1191,7 +1191,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 56
-      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 56 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1207,7 +1207,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 57
-      helas_VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 57 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1227,7 +1227,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 58
-      helas_VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1236,7 +1236,7 @@ namespace mg5amcCpu
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1245,7 +1245,7 @@ namespace mg5amcCpu
       jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1258,10 +1258,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 59 OF 123 ***
 
       // Wavefunction(s) for diagram number 59
-      helas_VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 59
-      helas_VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 59 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1281,7 +1281,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 60
-      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 60 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1301,7 +1301,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 61
-      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 61 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1317,7 +1317,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 62
-      helas_FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 62 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1331,7 +1331,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 63
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 63 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1347,7 +1347,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 64
-      helas_FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 64 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1358,11 +1358,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 65 OF 123 ***
 
       // Wavefunction(s) for diagram number 65
-      helas_VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] );
-      helas_FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 65
-      helas_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 65 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1373,10 +1373,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 66 OF 123 ***
 
       // Wavefunction(s) for diagram number 66
-      helas_VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[22] );
+      helas_CD_VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[22] );
 
       // Amplitude(s) for diagram number 66
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 66 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1392,7 +1392,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 67
-      helas_FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 67 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1403,10 +1403,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 68 OF 123 ***
 
       // Wavefunction(s) for diagram number 68
-      helas_FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
 
       // Amplitude(s) for diagram number 68
-      helas_FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 68 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1420,7 +1420,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 69
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 69 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1436,7 +1436,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 70
-      helas_FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 70 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1450,7 +1450,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 71
-      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 71 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1466,7 +1466,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 72
-      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 72 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1482,7 +1482,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 73
-      helas_VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 73 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1502,7 +1502,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 74
-      helas_VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1511,7 +1511,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1520,7 +1520,7 @@ namespace mg5amcCpu
       jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1533,10 +1533,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 75 OF 123 ***
 
       // Wavefunction(s) for diagram number 75
-      helas_VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[12] );
+      helas_CD_VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[12] );
 
       // Amplitude(s) for diagram number 75
-      helas_VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 75 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1556,7 +1556,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 76
-      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 76 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1576,7 +1576,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 77
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 77 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1592,7 +1592,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 78
-      helas_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 78 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1606,7 +1606,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 79
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 79 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1622,7 +1622,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 80
-      helas_FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 80 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1633,10 +1633,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 81 OF 123 ***
 
       // Wavefunction(s) for diagram number 81
-      helas_FFV1_1( w_fp[9], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
+      helas_CD_FFV1_1( w_fp[9], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] );
 
       // Amplitude(s) for diagram number 81
-      helas_FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 81 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1646,10 +1646,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 82 OF 123 ***
 
       // Wavefunction(s) for diagram number 82
-      helas_FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_CD_FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 82
-      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 82 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1662,7 +1662,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 83
-      helas_FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 83 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1672,10 +1672,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 84 OF 123 ***
 
       // Wavefunction(s) for diagram number 84
-      helas_FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
+      helas_CD_FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] );
 
       // Amplitude(s) for diagram number 84
-      helas_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 84 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1688,7 +1688,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 85
-      helas_FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 85 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1699,10 +1699,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 86 OF 123 ***
 
       // Wavefunction(s) for diagram number 86
-      helas_VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[23] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[23] );
 
       // Amplitude(s) for diagram number 86
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 86 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1715,10 +1715,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 87 OF 123 ***
 
       // Wavefunction(s) for diagram number 87
-      helas_FFV1_2( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
+      helas_CD_FFV1_2( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] );
 
       // Amplitude(s) for diagram number 87
-      helas_FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 87 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1728,10 +1728,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 88 OF 123 ***
 
       // Wavefunction(s) for diagram number 88
-      helas_FFV1_1( w_fp[11], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
+      helas_CD_FFV1_1( w_fp[11], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] );
 
       // Amplitude(s) for diagram number 88
-      helas_FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 88 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1744,7 +1744,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 89
-      helas_FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 89 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1754,10 +1754,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 90 OF 123 ***
 
       // Wavefunction(s) for diagram number 90
-      helas_FFV1_1( w_fp[14], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] );
+      helas_CD_FFV1_1( w_fp[14], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] );
 
       // Amplitude(s) for diagram number 90
-      helas_FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 90 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1770,7 +1770,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 91
-      helas_FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 91 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1784,7 +1784,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 92
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 92 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1800,7 +1800,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 93
-      helas_VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1809,7 +1809,7 @@ namespace mg5amcCpu
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1818,7 +1818,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1831,10 +1831,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 94 OF 123 ***
 
       // Wavefunction(s) for diagram number 94
-      helas_VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[22] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[22] );
 
       // Amplitude(s) for diagram number 94
-      helas_VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 94 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1851,10 +1851,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 95 OF 123 ***
 
       // Wavefunction(s) for diagram number 95
-      helas_VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[25] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[25] );
 
       // Amplitude(s) for diagram number 95
-      helas_VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 95 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1874,7 +1874,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 96
-      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 96 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1890,7 +1890,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 97
-      helas_FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 97 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1904,7 +1904,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 98
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 98 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1920,7 +1920,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 99
-      helas_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 99 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1934,7 +1934,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 100
-      helas_VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -1943,7 +1943,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1952,7 +1952,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0];
@@ -1965,10 +1965,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 101 OF 123 ***
 
       // Wavefunction(s) for diagram number 101
-      helas_VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 101
-      helas_VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 101 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1988,7 +1988,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 102
-      helas_VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 102 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2008,7 +2008,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 103
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 103 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2024,7 +2024,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 104
-      helas_FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 104 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2038,7 +2038,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 105
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 105 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2054,7 +2054,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 106
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 106 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2068,7 +2068,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 107
-      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2077,7 +2077,7 @@ namespace mg5amcCpu
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2086,7 +2086,7 @@ namespace mg5amcCpu
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2102,7 +2102,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 108
-      helas_VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 108 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2122,7 +2122,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 109
-      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 109 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2142,7 +2142,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 110
-      helas_FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 110 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2155,7 +2155,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 111
-      helas_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 111 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2168,7 +2168,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 112
-      helas_FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 112 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2181,7 +2181,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 113
-      helas_FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 113 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -2191,12 +2191,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 114 OF 123 ***
 
       // Wavefunction(s) for diagram number 114
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[12] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[24] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[12] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[24] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] );
 
       // Amplitude(s) for diagram number 114
-      helas_VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2205,7 +2205,7 @@ namespace mg5amcCpu
       jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2214,7 +2214,7 @@ namespace mg5amcCpu
       jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2230,17 +2230,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 115
-      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] += amp_sv[0];
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[23] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[19] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
       jamp_sv[21] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[18] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
       jamp_sv[22] += amp_sv[0];
@@ -2252,17 +2252,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 116
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[2] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[8] -= amp_sv[0];
       jamp_sv[12] += amp_sv[0];
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[0] -= amp_sv[0];
       jamp_sv[6] += amp_sv[0];
       jamp_sv[12] += amp_sv[0];
@@ -2271,12 +2271,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 117 OF 123 ***
 
       // Wavefunction(s) for diagram number 117
-      helas_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
 
       // Amplitude(s) for diagram number 117
-      helas_VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2285,7 +2285,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0];
@@ -2294,7 +2294,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2310,17 +2310,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 118
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] += amp_sv[0];
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[17] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[13] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
       jamp_sv[15] -= amp_sv[0];
       jamp_sv[16] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[12] -= amp_sv[0];
       jamp_sv[14] += amp_sv[0];
       jamp_sv[16] += amp_sv[0];
@@ -2332,17 +2332,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 119
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[20] += amp_sv[0];
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[4] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[10] -= amp_sv[0];
       jamp_sv[18] += amp_sv[0];
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[1] -= amp_sv[0];
       jamp_sv[7] += amp_sv[0];
       jamp_sv[18] += amp_sv[0];
@@ -2351,22 +2351,22 @@ namespace mg5amcCpu
       // *** DIAGRAM 120 OF 123 ***
 
       // Wavefunction(s) for diagram number 120
-      helas_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
-      helas_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] );
-      helas_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
+      helas_CD_VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] );
+      helas_CD_VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] );
+      helas_CD_VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] );
 
       // Amplitude(s) for diagram number 120
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] += amp_sv[0];
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[11] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[7] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
       jamp_sv[9] -= amp_sv[0];
       jamp_sv[10] += amp_sv[0];
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[6] -= amp_sv[0];
       jamp_sv[8] += amp_sv[0];
       jamp_sv[10] += amp_sv[0];
@@ -2378,17 +2378,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 121
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] += amp_sv[0];
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[22] += amp_sv[0];
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[5] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[16] -= amp_sv[0];
       jamp_sv[19] += amp_sv[0];
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], 1.0, &amp_fp[0] );
       jamp_sv[3] -= amp_sv[0];
       jamp_sv[13] += amp_sv[0];
       jamp_sv[19] += amp_sv[0];
@@ -2400,7 +2400,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 122
-      helas_VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2409,7 +2409,7 @@ namespace mg5amcCpu
       jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2418,7 +2418,7 @@ namespace mg5amcCpu
       jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2434,7 +2434,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 123
-      helas_VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2443,7 +2443,7 @@ namespace mg5amcCpu
       jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0];
@@ -2452,7 +2452,7 @@ namespace mg5amcCpu
       jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0];
-      helas_VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0];
       jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0];
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc
index 7390662111..14a0082832 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc
@@ -339,12 +339,12 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 );
 
-      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      helas_FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_CD_FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -357,10 +357,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 36 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_CD_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[8], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -373,10 +373,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 36 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_CD_FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -389,10 +389,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 36 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -405,10 +405,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 36 ***
 
       // Wavefunction(s) for diagram number 5
-      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 5
-      helas_FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -421,11 +421,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 36 ***
 
       // Wavefunction(s) for diagram number 6
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
-      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
 
       // Amplitude(s) for diagram number 6
-      helas_FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -436,11 +436,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 36 ***
 
       // Wavefunction(s) for diagram number 7
-      helas_FFV1_1( w_fp[4], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[10] );
-      helas_FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_CD_FFV1_1( w_fp[4], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 7
-      helas_FFV1_0( w_fp[5], w_fp[10], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[10], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -451,10 +451,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 8 OF 36 ***
 
       // Wavefunction(s) for diagram number 8
-      helas_FFV1_2( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[12] );
+      helas_CD_FFV1_2( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[12] );
 
       // Amplitude(s) for diagram number 8
-      helas_FFV1_0( w_fp[12], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -465,10 +465,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 9 OF 36 ***
 
       // Wavefunction(s) for diagram number 9
-      helas_FFV1_1( w_fp[9], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_CD_FFV1_1( w_fp[9], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 9
-      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -482,7 +482,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 10
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -493,11 +493,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 36 ***
 
       // Wavefunction(s) for diagram number 11
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
-      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 11
-      helas_FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -508,10 +508,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 36 ***
 
       // Wavefunction(s) for diagram number 12
-      helas_FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 12
-      helas_FFV1_0( w_fp[5], w_fp[10], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[10], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -525,7 +525,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 13
-      helas_FFV1_0( w_fp[12], w_fp[4], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[4], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -536,10 +536,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 14 OF 36 ***
 
       // Wavefunction(s) for diagram number 14
-      helas_FFV1_2( w_fp[11], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_CD_FFV1_2( w_fp[11], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
 
       // Amplitude(s) for diagram number 14
-      helas_FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -553,7 +553,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 15
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -564,11 +564,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 16 OF 36 ***
 
       // Wavefunction(s) for diagram number 16
-      helas_FFV1_1( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[9] );
-      helas_FFV1P0_3( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_CD_FFV1_1( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1P0_3( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 16
-      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -582,7 +582,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 17
-      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -596,7 +596,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 18
-      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -607,10 +607,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 19 OF 36 ***
 
       // Wavefunction(s) for diagram number 19
-      helas_FFV1_1( w_fp[9], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_CD_FFV1_1( w_fp[9], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 19
-      helas_FFV1_0( w_fp[5], w_fp[11], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[11], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -624,7 +624,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 20
-      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -635,11 +635,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 21 OF 36 ***
 
       // Wavefunction(s) for diagram number 21
-      helas_FFV1_2( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[14] );
-      helas_FFV1P0_3( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_CD_FFV1_2( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_CD_FFV1P0_3( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 21
-      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -653,7 +653,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 22
-      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -667,7 +667,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 23
-      helas_FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -678,10 +678,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 24 OF 36 ***
 
       // Wavefunction(s) for diagram number 24
-      helas_FFV1_2( w_fp[14], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1_2( w_fp[14], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 24
-      helas_FFV1_0( w_fp[9], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -695,7 +695,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 25
-      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -706,10 +706,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 26 OF 36 ***
 
       // Wavefunction(s) for diagram number 26
-      helas_FFV1_1( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_CD_FFV1_1( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
 
       // Amplitude(s) for diagram number 26
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -720,10 +720,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 27 OF 36 ***
 
       // Wavefunction(s) for diagram number 27
-      helas_VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 27
-      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -734,10 +734,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 28 OF 36 ***
 
       // Wavefunction(s) for diagram number 28
-      helas_FFV1_2( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_CD_FFV1_2( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 28
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -751,7 +751,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 29
-      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -762,10 +762,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 30 OF 36 ***
 
       // Wavefunction(s) for diagram number 30
-      helas_FFV1_1( w_fp[10], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1_1( w_fp[10], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 30
-      helas_FFV1_0( w_fp[5], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -776,10 +776,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 31 OF 36 ***
 
       // Wavefunction(s) for diagram number 31
-      helas_VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 31
-      helas_FFV1_0( w_fp[5], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -790,10 +790,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 32 OF 36 ***
 
       // Wavefunction(s) for diagram number 32
-      helas_FFV1_2( w_fp[12], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_2( w_fp[12], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 32
-      helas_FFV1_0( w_fp[10], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 32 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -807,7 +807,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 33
-      helas_FFV1_0( w_fp[12], w_fp[4], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[4], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -821,17 +821,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 34
-      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += 1. / 2. * amp_sv[0];
       jamp_sv[2] -= 1. / 2. * amp_sv[0];
       jamp_sv[9] -= 1. / 2. * amp_sv[0];
       jamp_sv[10] += 1. / 2. * amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += 1. / 2. * amp_sv[0];
       jamp_sv[5] -= 1. / 2. * amp_sv[0];
       jamp_sv[6] -= 1. / 2. * amp_sv[0];
       jamp_sv[10] += 1. / 2. * amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += 1. / 2. * amp_sv[0];
       jamp_sv[5] -= 1. / 2. * amp_sv[0];
       jamp_sv[6] -= 1. / 2. * amp_sv[0];
@@ -843,7 +843,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 35
-      helas_VVV1_0( w_fp[1], w_fp[8], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[8], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -859,7 +859,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 36
-      helas_VVV1_0( w_fp[1], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[1], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc
index 6c2056c725..105764608f 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc
@@ -339,12 +339,12 @@ namespace mg5amcCpu
 
       oxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 );
 
-      helas_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      helas_FFV1_1( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_CD_FFV1_1( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[1], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[1], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -357,10 +357,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 36 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_2( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_CD_FFV1_2( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[8], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -373,10 +373,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 36 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_CD_FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -389,10 +389,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 36 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -405,10 +405,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 36 ***
 
       // Wavefunction(s) for diagram number 5
-      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 5
-      helas_FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -421,11 +421,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 36 ***
 
       // Wavefunction(s) for diagram number 6
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
-      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
 
       // Amplitude(s) for diagram number 6
-      helas_FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -436,11 +436,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 36 ***
 
       // Wavefunction(s) for diagram number 7
-      helas_FFV1_1( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] );
-      helas_FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_CD_FFV1_1( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 7
-      helas_FFV1_0( w_fp[1], w_fp[10], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[1], w_fp[10], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -451,10 +451,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 8 OF 36 ***
 
       // Wavefunction(s) for diagram number 8
-      helas_FFV1_2( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[12] );
+      helas_CD_FFV1_2( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[12] );
 
       // Amplitude(s) for diagram number 8
-      helas_FFV1_0( w_fp[12], w_fp[5], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[5], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -465,10 +465,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 9 OF 36 ***
 
       // Wavefunction(s) for diagram number 9
-      helas_FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_CD_FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 9
-      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -482,7 +482,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 10
-      helas_VVV1_0( w_fp[4], w_fp[8], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[8], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -493,11 +493,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 36 ***
 
       // Wavefunction(s) for diagram number 11
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
-      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 11
-      helas_FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -508,10 +508,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 36 ***
 
       // Wavefunction(s) for diagram number 12
-      helas_FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 12
-      helas_FFV1_0( w_fp[1], w_fp[10], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[1], w_fp[10], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -525,7 +525,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 13
-      helas_FFV1_0( w_fp[12], w_fp[5], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[5], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -536,10 +536,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 14 OF 36 ***
 
       // Wavefunction(s) for diagram number 14
-      helas_FFV1_2( w_fp[11], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_CD_FFV1_2( w_fp[11], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
 
       // Amplitude(s) for diagram number 14
-      helas_FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -553,7 +553,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 15
-      helas_VVV1_0( w_fp[4], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -564,11 +564,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 16 OF 36 ***
 
       // Wavefunction(s) for diagram number 16
-      helas_FFV1_1( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[9] );
-      helas_FFV1P0_3( w_fp[1], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_CD_FFV1_1( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1P0_3( w_fp[1], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 16
-      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -582,7 +582,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 17
-      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -596,7 +596,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 18
-      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -607,10 +607,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 19 OF 36 ***
 
       // Wavefunction(s) for diagram number 19
-      helas_FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_CD_FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 19
-      helas_FFV1_0( w_fp[1], w_fp[11], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[1], w_fp[11], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -624,7 +624,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 20
-      helas_VVV1_0( w_fp[4], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -635,11 +635,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 21 OF 36 ***
 
       // Wavefunction(s) for diagram number 21
-      helas_FFV1_2( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[14] );
-      helas_FFV1P0_3( w_fp[14], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_CD_FFV1_2( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_CD_FFV1P0_3( w_fp[14], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 21
-      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -653,7 +653,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 22
-      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -667,7 +667,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 23
-      helas_FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -678,10 +678,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 24 OF 36 ***
 
       // Wavefunction(s) for diagram number 24
-      helas_FFV1_2( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1_2( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 24
-      helas_FFV1_0( w_fp[9], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -695,7 +695,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 25
-      helas_VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -706,10 +706,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 26 OF 36 ***
 
       // Wavefunction(s) for diagram number 26
-      helas_FFV1_1( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_CD_FFV1_1( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
 
       // Amplitude(s) for diagram number 26
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -720,10 +720,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 27 OF 36 ***
 
       // Wavefunction(s) for diagram number 27
-      helas_VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 27
-      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -734,10 +734,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 28 OF 36 ***
 
       // Wavefunction(s) for diagram number 28
-      helas_FFV1_2( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_CD_FFV1_2( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 28
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -751,7 +751,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 29
-      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -762,10 +762,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 30 OF 36 ***
 
       // Wavefunction(s) for diagram number 30
-      helas_FFV1_1( w_fp[10], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1_1( w_fp[10], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 30
-      helas_FFV1_0( w_fp[1], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[1], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -776,10 +776,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 31 OF 36 ***
 
       // Wavefunction(s) for diagram number 31
-      helas_VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 31
-      helas_FFV1_0( w_fp[1], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[1], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -790,10 +790,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 32 OF 36 ***
 
       // Wavefunction(s) for diagram number 32
-      helas_FFV1_2( w_fp[12], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_2( w_fp[12], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 32
-      helas_FFV1_0( w_fp[10], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[5], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 32 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -807,7 +807,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 33
-      helas_FFV1_0( w_fp[12], w_fp[5], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[5], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -821,17 +821,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 34
-      helas_VVVV1_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= 1. / 2. * amp_sv[0];
       jamp_sv[5] += 1. / 2. * amp_sv[0];
       jamp_sv[8] -= 1. / 2. * amp_sv[0];
       jamp_sv[10] += 1. / 2. * amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += 1. / 2. * amp_sv[0];
       jamp_sv[2] -= 1. / 2. * amp_sv[0];
       jamp_sv[7] += 1. / 2. * amp_sv[0];
       jamp_sv[8] -= 1. / 2. * amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] += 1. / 2. * amp_sv[0];
       jamp_sv[5] -= 1. / 2. * amp_sv[0];
       jamp_sv[7] += 1. / 2. * amp_sv[0];
@@ -843,7 +843,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 35
-      helas_VVV1_0( w_fp[4], w_fp[8], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[8], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -859,7 +859,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 36
-      helas_VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc
index 185b8b5c09..c926fdf932 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc
@@ -339,12 +339,12 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 );
 
-      helas_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      helas_FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_CD_FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -357,10 +357,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 36 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_CD_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[8], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -373,10 +373,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 36 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_CD_FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -389,10 +389,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 36 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -405,10 +405,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 36 ***
 
       // Wavefunction(s) for diagram number 5
-      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 5
-      helas_FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -421,11 +421,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 36 ***
 
       // Wavefunction(s) for diagram number 6
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
-      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
 
       // Amplitude(s) for diagram number 6
-      helas_FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -436,11 +436,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 36 ***
 
       // Wavefunction(s) for diagram number 7
-      helas_FFV1_1( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] );
-      helas_FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_CD_FFV1_1( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 7
-      helas_FFV1_0( w_fp[5], w_fp[10], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[10], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -451,10 +451,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 8 OF 36 ***
 
       // Wavefunction(s) for diagram number 8
-      helas_FFV1_2( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[12] );
+      helas_CD_FFV1_2( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[12] );
 
       // Amplitude(s) for diagram number 8
-      helas_FFV1_0( w_fp[12], w_fp[1], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[1], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -465,10 +465,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 9 OF 36 ***
 
       // Wavefunction(s) for diagram number 9
-      helas_FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_CD_FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 9
-      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -482,7 +482,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 10
-      helas_VVV1_0( w_fp[4], w_fp[8], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[8], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -493,11 +493,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 36 ***
 
       // Wavefunction(s) for diagram number 11
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
-      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 11
-      helas_FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -508,10 +508,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 36 ***
 
       // Wavefunction(s) for diagram number 12
-      helas_FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 12
-      helas_FFV1_0( w_fp[5], w_fp[10], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[10], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -525,7 +525,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 13
-      helas_FFV1_0( w_fp[12], w_fp[1], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[1], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -536,10 +536,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 14 OF 36 ***
 
       // Wavefunction(s) for diagram number 14
-      helas_FFV1_2( w_fp[11], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_CD_FFV1_2( w_fp[11], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
 
       // Amplitude(s) for diagram number 14
-      helas_FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -553,7 +553,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 15
-      helas_VVV1_0( w_fp[4], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -564,11 +564,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 16 OF 36 ***
 
       // Wavefunction(s) for diagram number 16
-      helas_FFV1_1( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[9] );
-      helas_FFV1P0_3( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_CD_FFV1_1( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1P0_3( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 16
-      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -582,7 +582,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 17
-      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -596,7 +596,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 18
-      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -607,10 +607,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 19 OF 36 ***
 
       // Wavefunction(s) for diagram number 19
-      helas_FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_CD_FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 19
-      helas_FFV1_0( w_fp[5], w_fp[11], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[11], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -624,7 +624,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 20
-      helas_VVV1_0( w_fp[4], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -635,11 +635,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 21 OF 36 ***
 
       // Wavefunction(s) for diagram number 21
-      helas_FFV1_2( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[14] );
-      helas_FFV1P0_3( w_fp[14], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_CD_FFV1_2( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_CD_FFV1P0_3( w_fp[14], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 21
-      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -653,7 +653,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 22
-      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -667,7 +667,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 23
-      helas_FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -678,10 +678,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 24 OF 36 ***
 
       // Wavefunction(s) for diagram number 24
-      helas_FFV1_2( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1_2( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 24
-      helas_FFV1_0( w_fp[9], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -695,7 +695,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 25
-      helas_VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -706,10 +706,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 26 OF 36 ***
 
       // Wavefunction(s) for diagram number 26
-      helas_FFV1_1( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_CD_FFV1_1( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
 
       // Amplitude(s) for diagram number 26
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -720,10 +720,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 27 OF 36 ***
 
       // Wavefunction(s) for diagram number 27
-      helas_VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 27
-      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -734,10 +734,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 28 OF 36 ***
 
       // Wavefunction(s) for diagram number 28
-      helas_FFV1_2( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_CD_FFV1_2( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 28
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -751,7 +751,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 29
-      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -762,10 +762,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 30 OF 36 ***
 
       // Wavefunction(s) for diagram number 30
-      helas_FFV1_1( w_fp[10], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1_1( w_fp[10], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 30
-      helas_FFV1_0( w_fp[5], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -776,10 +776,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 31 OF 36 ***
 
       // Wavefunction(s) for diagram number 31
-      helas_VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 31
-      helas_FFV1_0( w_fp[5], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -790,10 +790,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 32 OF 36 ***
 
       // Wavefunction(s) for diagram number 32
-      helas_FFV1_2( w_fp[12], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_2( w_fp[12], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 32
-      helas_FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 32 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -807,7 +807,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 33
-      helas_FFV1_0( w_fp[12], w_fp[1], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[1], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -821,17 +821,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 34
-      helas_VVVV1_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[4] -= 1. / 2. * amp_sv[0];
       jamp_sv[7] += 1. / 2. * amp_sv[0];
       jamp_sv[8] -= 1. / 2. * amp_sv[0];
       jamp_sv[11] += 1. / 2. * amp_sv[0];
-      helas_VVVV3_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= 1. / 2. * amp_sv[0];
       jamp_sv[3] -= 1. / 2. * amp_sv[0];
       jamp_sv[7] += 1. / 2. * amp_sv[0];
       jamp_sv[11] += 1. / 2. * amp_sv[0];
-      helas_VVVV4_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= 1. / 2. * amp_sv[0];
       jamp_sv[3] -= 1. / 2. * amp_sv[0];
       jamp_sv[4] += 1. / 2. * amp_sv[0];
@@ -843,7 +843,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 35
-      helas_VVV1_0( w_fp[4], w_fp[8], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[8], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -859,7 +859,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 36
-      helas_VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc
index 676ae8fe28..e8b1f58646 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc
@@ -341,12 +341,12 @@ namespace mg5amcCpu
 
       oxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 );
 
-      helas_FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] );
-      helas_FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_CD_FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -359,10 +359,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 7 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -375,10 +375,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 7 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -389,10 +389,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 7 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_2( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_2( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[3], w_fp[5], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[5], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -405,10 +405,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 7 ***
 
       // Wavefunction(s) for diagram number 5
-      helas_FFV1_1( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_1( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 5
-      helas_FFV1_0( w_fp[1], w_fp[3], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[1], w_fp[3], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -421,10 +421,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 7 ***
 
       // Wavefunction(s) for diagram number 6
-      helas_FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 6
-      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -437,10 +437,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 7 ***
 
       // Wavefunction(s) for diagram number 7
-      helas_FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 7
-      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc
index fb39071e9a..ae2e7754a6 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc
@@ -347,12 +347,12 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 );
 
-      helas_FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] );
-      helas_FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_CD_FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -365,10 +365,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 7 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -381,10 +381,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 7 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -395,10 +395,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 7 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -411,10 +411,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 7 ***
 
       // Wavefunction(s) for diagram number 5
-      helas_FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 5
-      helas_FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -427,10 +427,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 7 ***
 
       // Wavefunction(s) for diagram number 6
-      helas_FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 6
-      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -443,10 +443,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 7 ***
 
       // Wavefunction(s) for diagram number 7
-      helas_FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 7
-      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc
index f941fbd814..fe84d8319f 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc
@@ -339,12 +339,12 @@ namespace mg5amcCpu
 
       oxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 );
 
-      helas_FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] );
-      helas_FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_CD_FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -357,10 +357,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 14 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -373,10 +373,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 14 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -387,10 +387,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 14 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_2( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1_2( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[9], w_fp[5], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[5], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -403,10 +403,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 14 ***
 
       // Wavefunction(s) for diagram number 5
-      helas_FFV1_1( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1_1( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 5
-      helas_FFV1_0( w_fp[1], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[1], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -419,12 +419,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 14 ***
 
       // Wavefunction(s) for diagram number 6
-      helas_FFV1P0_3( w_fp[0], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[9] );
-      helas_FFV1P0_3( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] );
-      helas_FFV1_1( w_fp[2], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
+      helas_CD_FFV1P0_3( w_fp[0], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1P0_3( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
 
       // Amplitude(s) for diagram number 6
-      helas_FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -437,10 +437,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 14 ***
 
       // Wavefunction(s) for diagram number 7
-      helas_FFV1_2( w_fp[3], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
 
       // Amplitude(s) for diagram number 7
-      helas_FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -456,7 +456,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 8
-      helas_VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -467,10 +467,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 9 OF 14 ***
 
       // Wavefunction(s) for diagram number 9
-      helas_FFV1_2( w_fp[1], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_2( w_fp[1], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 9
-      helas_FFV1_0( w_fp[10], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -483,10 +483,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 14 ***
 
       // Wavefunction(s) for diagram number 10
-      helas_FFV1_1( w_fp[4], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_1( w_fp[4], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 10
-      helas_FFV1_0( w_fp[1], w_fp[10], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[1], w_fp[10], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -499,10 +499,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 14 ***
 
       // Wavefunction(s) for diagram number 11
-      helas_FFV1_2( w_fp[0], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_2( w_fp[0], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 11
-      helas_FFV1_0( w_fp[10], w_fp[5], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[5], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -515,10 +515,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 14 ***
 
       // Wavefunction(s) for diagram number 12
-      helas_FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 12
-      helas_FFV1_0( w_fp[10], w_fp[5], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[5], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -531,10 +531,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 13 OF 14 ***
 
       // Wavefunction(s) for diagram number 13
-      helas_FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 13
-      helas_FFV1_0( w_fp[6], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -550,7 +550,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      helas_FFV1_0( w_fp[10], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc
index b96c2a88fa..d26fff01c7 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc
@@ -347,12 +347,12 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 );
 
-      helas_FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[6] );
-      helas_FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_CD_FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -365,10 +365,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 7 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -381,10 +381,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 7 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -395,10 +395,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 7 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -411,10 +411,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 7 ***
 
       // Wavefunction(s) for diagram number 5
-      helas_FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 5
-      helas_FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -427,10 +427,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 7 ***
 
       // Wavefunction(s) for diagram number 6
-      helas_FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 6
-      helas_FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -443,10 +443,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 7 ***
 
       // Wavefunction(s) for diagram number 7
-      helas_FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 7
-      helas_FFV1_0( w_fp[3], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc
index 62b6dc7eef..08f3cf627b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc
@@ -339,12 +339,12 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 );
 
-      helas_VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      helas_FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_CD_VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_CD_FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -357,10 +357,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 36 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_2( w_fp[0], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_CD_FFV1_2( w_fp[0], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[8], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -373,10 +373,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 36 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_CD_FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -389,10 +389,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 36 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -405,10 +405,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 36 ***
 
       // Wavefunction(s) for diagram number 5
-      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 5
-      helas_FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -421,11 +421,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 36 ***
 
       // Wavefunction(s) for diagram number 6
-      helas_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
-      helas_FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
 
       // Amplitude(s) for diagram number 6
-      helas_FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -436,11 +436,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 36 ***
 
       // Wavefunction(s) for diagram number 7
-      helas_FFV1_1( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[10] );
-      helas_FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_CD_FFV1_1( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 7
-      helas_FFV1_0( w_fp[0], w_fp[10], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[0], w_fp[10], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -451,10 +451,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 8 OF 36 ***
 
       // Wavefunction(s) for diagram number 8
-      helas_FFV1_2( w_fp[0], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[12] );
+      helas_CD_FFV1_2( w_fp[0], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[12] );
 
       // Amplitude(s) for diagram number 8
-      helas_FFV1_0( w_fp[12], w_fp[1], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[1], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -465,10 +465,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 9 OF 36 ***
 
       // Wavefunction(s) for diagram number 9
-      helas_FFV1_1( w_fp[9], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_CD_FFV1_1( w_fp[9], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 9
-      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -482,7 +482,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 10
-      helas_VVV1_0( w_fp[5], w_fp[8], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[5], w_fp[8], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -493,11 +493,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 36 ***
 
       // Wavefunction(s) for diagram number 11
-      helas_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
-      helas_FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 11
-      helas_FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -508,10 +508,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 36 ***
 
       // Wavefunction(s) for diagram number 12
-      helas_FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 12
-      helas_FFV1_0( w_fp[0], w_fp[10], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[0], w_fp[10], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -525,7 +525,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 13
-      helas_FFV1_0( w_fp[12], w_fp[1], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[1], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -536,10 +536,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 14 OF 36 ***
 
       // Wavefunction(s) for diagram number 14
-      helas_FFV1_2( w_fp[11], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_CD_FFV1_2( w_fp[11], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
 
       // Amplitude(s) for diagram number 14
-      helas_FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -553,7 +553,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 15
-      helas_VVV1_0( w_fp[5], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[5], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -564,11 +564,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 16 OF 36 ***
 
       // Wavefunction(s) for diagram number 16
-      helas_FFV1_1( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
-      helas_FFV1P0_3( w_fp[0], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_CD_FFV1_1( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1P0_3( w_fp[0], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 16
-      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -582,7 +582,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 17
-      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -596,7 +596,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 18
-      helas_FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -607,10 +607,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 19 OF 36 ***
 
       // Wavefunction(s) for diagram number 19
-      helas_FFV1_1( w_fp[9], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_CD_FFV1_1( w_fp[9], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 19
-      helas_FFV1_0( w_fp[0], w_fp[11], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[0], w_fp[11], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -624,7 +624,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 20
-      helas_VVV1_0( w_fp[5], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[5], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -635,11 +635,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 21 OF 36 ***
 
       // Wavefunction(s) for diagram number 21
-      helas_FFV1_2( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[14] );
-      helas_FFV1P0_3( w_fp[14], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_CD_FFV1_2( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_CD_FFV1P0_3( w_fp[14], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 21
-      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -653,7 +653,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 22
-      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -667,7 +667,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 23
-      helas_FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -678,10 +678,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 24 OF 36 ***
 
       // Wavefunction(s) for diagram number 24
-      helas_FFV1_2( w_fp[14], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1_2( w_fp[14], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 24
-      helas_FFV1_0( w_fp[9], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -695,7 +695,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 25
-      helas_VVV1_0( w_fp[5], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[5], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -706,10 +706,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 26 OF 36 ***
 
       // Wavefunction(s) for diagram number 26
-      helas_FFV1_1( w_fp[13], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
+      helas_CD_FFV1_1( w_fp[13], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] );
 
       // Amplitude(s) for diagram number 26
-      helas_FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -720,10 +720,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 27 OF 36 ***
 
       // Wavefunction(s) for diagram number 27
-      helas_VVV1P0_1( w_fp[4], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] );
+      helas_CD_VVV1P0_1( w_fp[4], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] );
 
       // Amplitude(s) for diagram number 27
-      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -734,10 +734,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 28 OF 36 ***
 
       // Wavefunction(s) for diagram number 28
-      helas_FFV1_2( w_fp[6], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_CD_FFV1_2( w_fp[6], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 28
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -751,7 +751,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 29
-      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -762,10 +762,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 30 OF 36 ***
 
       // Wavefunction(s) for diagram number 30
-      helas_FFV1_1( w_fp[10], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1_1( w_fp[10], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 30
-      helas_FFV1_0( w_fp[0], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[0], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -776,10 +776,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 31 OF 36 ***
 
       // Wavefunction(s) for diagram number 31
-      helas_VVV1P0_1( w_fp[4], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_VVV1P0_1( w_fp[4], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 31
-      helas_FFV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -790,10 +790,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 32 OF 36 ***
 
       // Wavefunction(s) for diagram number 32
-      helas_FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 32
-      helas_FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 32 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -807,7 +807,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 33
-      helas_FFV1_0( w_fp[12], w_fp[1], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[1], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -821,17 +821,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 34
-      helas_VVVV1_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= 1. / 2. * amp_sv[0];
       jamp_sv[3] += 1. / 2. * amp_sv[0];
       jamp_sv[4] += 1. / 2. * amp_sv[0];
       jamp_sv[5] -= 1. / 2. * amp_sv[0];
-      helas_VVVV3_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV3_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] -= 1. / 2. * amp_sv[0];
       jamp_sv[5] -= 1. / 2. * amp_sv[0];
       jamp_sv[9] += 1. / 2. * amp_sv[0];
       jamp_sv[10] += 1. / 2. * amp_sv[0];
-      helas_VVVV4_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV4_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= 1. / 2. * amp_sv[0];
       jamp_sv[4] -= 1. / 2. * amp_sv[0];
       jamp_sv[9] += 1. / 2. * amp_sv[0];
@@ -843,7 +843,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 35
-      helas_VVV1_0( w_fp[5], w_fp[8], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[5], w_fp[8], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -859,7 +859,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 36
-      helas_VVV1_0( w_fp[5], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[5], w_fp[7], w_fp[11], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc
index e6ac298d60..edd5ccaf2c 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc
@@ -339,12 +339,12 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 );
 
-      helas_FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[6] );
-      helas_FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_CD_FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -357,10 +357,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 14 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -373,10 +373,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 14 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -387,10 +387,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 14 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[9], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -403,10 +403,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 14 ***
 
       // Wavefunction(s) for diagram number 5
-      helas_FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 5
-      helas_FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -419,12 +419,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 14 ***
 
       // Wavefunction(s) for diagram number 6
-      helas_FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
-      helas_FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[6] );
-      helas_FFV1_1( w_fp[2], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
+      helas_CD_FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
 
       // Amplitude(s) for diagram number 6
-      helas_FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -437,10 +437,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 14 ***
 
       // Wavefunction(s) for diagram number 7
-      helas_FFV1_2( w_fp[3], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
 
       // Amplitude(s) for diagram number 7
-      helas_FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -456,7 +456,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 8
-      helas_VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -467,10 +467,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 9 OF 14 ***
 
       // Wavefunction(s) for diagram number 9
-      helas_FFV1_2( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_2( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 9
-      helas_FFV1_0( w_fp[10], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -483,10 +483,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 14 ***
 
       // Wavefunction(s) for diagram number 10
-      helas_FFV1_1( w_fp[1], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_1( w_fp[1], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 10
-      helas_FFV1_0( w_fp[5], w_fp[10], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[10], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -499,10 +499,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 14 ***
 
       // Wavefunction(s) for diagram number 11
-      helas_FFV1_2( w_fp[0], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_2( w_fp[0], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 11
-      helas_FFV1_0( w_fp[10], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[4], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -515,10 +515,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 14 ***
 
       // Wavefunction(s) for diagram number 12
-      helas_FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 12
-      helas_FFV1_0( w_fp[10], w_fp[4], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[4], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -531,10 +531,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 13 OF 14 ***
 
       // Wavefunction(s) for diagram number 13
-      helas_FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 13
-      helas_FFV1_0( w_fp[6], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -550,7 +550,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      helas_FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc
index 70e56ccbdf..8db2d41c78 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc
@@ -341,12 +341,12 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 );
 
-      helas_FFV1P0_3( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
-      helas_FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_CD_FFV1P0_3( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -359,10 +359,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 7 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -375,10 +375,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 7 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -389,10 +389,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 7 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -405,10 +405,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 7 ***
 
       // Wavefunction(s) for diagram number 5
-      helas_FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 5
-      helas_FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -421,10 +421,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 7 ***
 
       // Wavefunction(s) for diagram number 6
-      helas_FFV1_2( w_fp[4], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_2( w_fp[4], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 6
-      helas_FFV1_0( w_fp[3], w_fp[0], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[0], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -437,10 +437,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 7 ***
 
       // Wavefunction(s) for diagram number 7
-      helas_FFV1_2( w_fp[4], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_2( w_fp[4], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 7
-      helas_FFV1_0( w_fp[3], w_fp[0], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[0], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc
index ff48e18cda..6828609429 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc
@@ -339,12 +339,12 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 );
 
-      helas_FFV1P0_3( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
-      helas_FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_CD_FFV1P0_3( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -357,10 +357,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 14 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -373,10 +373,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 14 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 3
-      helas_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -387,10 +387,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 14 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[9], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -403,10 +403,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 14 ***
 
       // Wavefunction(s) for diagram number 5
-      helas_FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 5
-      helas_FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -419,12 +419,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 14 ***
 
       // Wavefunction(s) for diagram number 6
-      helas_FFV1P0_3( w_fp[4], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[9] );
-      helas_FFV1P0_3( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
-      helas_FFV1_1( w_fp[2], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
+      helas_CD_FFV1P0_3( w_fp[4], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1P0_3( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
 
       // Amplitude(s) for diagram number 6
-      helas_FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -437,10 +437,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 14 ***
 
       // Wavefunction(s) for diagram number 7
-      helas_FFV1_2( w_fp[3], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] );
 
       // Amplitude(s) for diagram number 7
-      helas_FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -456,7 +456,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 8
-      helas_VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -467,10 +467,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 9 OF 14 ***
 
       // Wavefunction(s) for diagram number 9
-      helas_FFV1_2( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_2( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 9
-      helas_FFV1_0( w_fp[10], w_fp[0], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[0], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -483,10 +483,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 14 ***
 
       // Wavefunction(s) for diagram number 10
-      helas_FFV1_1( w_fp[0], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_1( w_fp[0], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 10
-      helas_FFV1_0( w_fp[5], w_fp[10], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[10], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -499,10 +499,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 14 ***
 
       // Wavefunction(s) for diagram number 11
-      helas_FFV1_2( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_2( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 11
-      helas_FFV1_0( w_fp[10], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[1], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -515,10 +515,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 14 ***
 
       // Wavefunction(s) for diagram number 12
-      helas_FFV1_2( w_fp[4], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_2( w_fp[4], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 12
-      helas_FFV1_0( w_fp[10], w_fp[1], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[1], w_fp[6], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -531,10 +531,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 13 OF 14 ***
 
       // Wavefunction(s) for diagram number 13
-      helas_FFV1_2( w_fp[4], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1_2( w_fp[4], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 13
-      helas_FFV1_0( w_fp[6], w_fp[0], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[0], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -550,7 +550,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      helas_FFV1_0( w_fp[10], w_fp[0], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[0], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h b/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h
index 24e8114e3a..624de4a7b3 100644
--- a/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h
@@ -1404,178 +1404,347 @@ namespace mg5amcCpu
 
 #ifndef MGONGPU_LINKER_HELAMPS
 
-#define helas_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
-#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
-#define helas_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_VVVV3_0 VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
-#define helas_VVVV4_0 VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CD_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1_0 VVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1P0_1 VVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_1 FFV1_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_2 FFV1_2<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1P0_3 FFV1P0_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV1P0_1 VVVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV3_0 VVVV3_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV3_0 VVVV3_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV3P0_1 VVVV3P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV4_0 VVVV4_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV4_0 VVVV4_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV4P0_1 VVVV4P0_1<W_ACCESS, CI_ACCESS>
 
 #else
 
-#define helas_VVV1_0 linker_VVV1_0
-#define helas_VVV1P0_1 linker_VVV1P0_1
-#define helas_FFV1_0 linker_FFV1_0
-#define helas_FFV1_1 linker_FFV1_1
-#define helas_FFV1_2 linker_FFV1_2
-#define helas_FFV1P0_3 linker_FFV1P0_3
-#define helas_VVVV1_0 linker_VVVV1_0
-#define helas_VVVV1P0_1 linker_VVVV1P0_1
-#define helas_VVVV3_0 linker_VVVV3_0
-#define helas_VVVV3P0_1 linker_VVVV3P0_1
-#define helas_VVVV4_0 linker_VVVV4_0
-#define helas_VVVV4P0_1 linker_VVVV4P0_1
+#define helas_CD_VVV1_0 linker_CD_VVV1_0
+#define helas_CI_VVV1_0 linker_CI_VVV1_0
+#define helas_CD_VVV1P0_1 linker_CD_VVV1P0_1
+#define helas_CI_VVV1P0_1 linker_CI_VVV1P0_1
+#define helas_CD_FFV1_0 linker_CD_FFV1_0
+#define helas_CI_FFV1_0 linker_CI_FFV1_0
+#define helas_CD_FFV1_1 linker_CD_FFV1_1
+#define helas_CI_FFV1_1 linker_CI_FFV1_1
+#define helas_CD_FFV1_2 linker_CD_FFV1_2
+#define helas_CI_FFV1_2 linker_CI_FFV1_2
+#define helas_CD_FFV1P0_3 linker_CD_FFV1P0_3
+#define helas_CI_FFV1P0_3 linker_CI_FFV1P0_3
+#define helas_CD_VVVV1_0 linker_CD_VVVV1_0
+#define helas_CI_VVVV1_0 linker_CI_VVVV1_0
+#define helas_CD_VVVV1P0_1 linker_CD_VVVV1P0_1
+#define helas_CI_VVVV1P0_1 linker_CI_VVVV1P0_1
+#define helas_CD_VVVV3_0 linker_CD_VVVV3_0
+#define helas_CI_VVVV3_0 linker_CI_VVVV3_0
+#define helas_CD_VVVV3P0_1 linker_CD_VVVV3P0_1
+#define helas_CI_VVVV3P0_1 linker_CI_VVVV3P0_1
+#define helas_CD_VVVV4_0 linker_CD_VVVV4_0
+#define helas_CI_VVVV4_0 linker_CI_VVVV4_0
+#define helas_CD_VVVV4P0_1 linker_CD_VVVV4P0_1
+#define helas_CI_VVVV4P0_1 linker_CI_VVVV4P0_1
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] );
+  linker_CI_VVV1_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] );
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] );
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] );
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV1_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] );
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV1P0_1( const fptype allV2[],
+  linker_CI_FFV1_1( const fptype allF2[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
                     const fptype M1,
                     const fptype W1,
-                    fptype allV1[] );
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV3_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] );
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV3P0_1( const fptype allV2[],
+  linker_CI_FFV1_2( const fptype allF1[],
                     const fptype allV3[],
-                    const fptype allV4[],
                     const fptype allCOUP[],
                     const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] );
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
   __device__ void
-  linker_VVVV4_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] );
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
   __device__ void
-  linker_VVVV4P0_1( const fptype allV2[],
-                    const fptype allV3[],
-                    const fptype allV4[],
-                    const fptype allCOUP[],
-                    const double Ccoeff,
-                    const fptype M1,
-                    const fptype W1,
-                    fptype allV1[] );
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV3P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV4_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV4_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV4P0_1( const fptype allV2[],
+                       const fptype allV3[],
+                       const fptype allV4[],
+                       const fptype allCOUP[],
+                       const double Ccoeff,
+                       const fptype M1,
+                       const fptype W1,
+                       fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt
index 1d163b2ce7..7d5ba7f16f 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt
@@ -77,7 +77,7 @@ INFO: load vertices
 [1;32mDEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1) [0m
 [1;32mDEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3) [0m
 [1;32mDEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1) [0m
-[1;32mDEBUG: model prefixing  takes 0.14316701889038086 [0m
+[1;32mDEBUG: model prefixing  takes 0.14253568649291992 [0m
 INFO: Change particles name to pass to MG5 convention 
 Defined multiparticle p = g u c d s u~ c~ d~ s~
 Defined multiparticle j = g u c d s u~ c~ d~ s~
@@ -114,8 +114,8 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 
 INFO: Processing color information for process: g g > t t~ t t~ @1 
 INFO: Creating files in directory P1_gg_ttxttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f99ad249100> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fee1df5d100> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -131,25 +131,25 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ t t~ WEIGHTED<=4 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxttx 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/SubProcesses/P1_gg_ttxttx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 70 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 70 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [1;30m[model_handling.py at line 1626][0m [0m
 Generated helas calls for 1 subprocesses (72 diagrams) in 0.193 s
-Wrote files for 119 helas calls in 0.443 s
+Wrote files for 119 helas calls in 0.445 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV5 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV9 routines[0m
 ALOHA: aloha creates VVVV10 routines[0m
-ALOHA: aloha creates 5 routines in  0.327 s
+ALOHA: aloha creates 5 routines in  0.331 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV5 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV9 routines[0m
 ALOHA: aloha creates VVVV10 routines[0m
-ALOHA: aloha creates 10 routines in  0.341 s
+ALOHA: aloha creates 10 routines in  0.344 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -193,8 +193,8 @@ Run "open index.html" to see more information about this process.
 quit
 
 real	0m7.445s
-user	0m7.100s
-sys	0m0.303s
+user	0m7.114s
+sys	0m0.296s
 Code generation completed in 7 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/HelAmps.cc
index b1e8df624a..a5fef21de4 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/HelAmps.cc
@@ -62,137 +62,270 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV5_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_VVV5_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVV5P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] )
+  linker_CI_VVV5_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return VVV5_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVV5P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
     return VVV5P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_VVV5P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
+  {
+    return VVV5P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] )
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
   {
     return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] )
+  linker_CI_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CI_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
   {
     return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CI_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] )
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
   {
     return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
   __device__ void
-  linker_VVVV1_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] )
+  linker_CD_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
   {
     return VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
   __device__ void
-  linker_VVVV9_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] )
+  linker_CI_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV9_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
   {
     return VVVV9_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV9_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV9_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
   __device__ void
-  linker_VVVV10_0( const fptype allV1[],
-                   const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allV4[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   fptype allvertexes[] )
+  linker_CD_VVVV10_0( const fptype allV1[],
+                      const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allV4[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      fptype allvertexes[] )
   {
     return VVVV10_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV10_0( const fptype allV1[],
+                      const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allV4[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      fptype allvertexes[] )
+  {
+    return VVVV10_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
 }
 #endif
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc
index 3a68f91378..c2a07a90d7 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc
@@ -336,12 +336,12 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][5], -1, w_fp[5], 5 );
 
-      helas_VVV5P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      helas_FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_CD_VVV5P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_CD_FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -354,10 +354,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 72 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[9], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -370,10 +370,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 72 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 3
-      helas_VVV5_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -386,11 +386,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 72 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1P0_3( w_fp[5], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[11] );
-      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_CD_FFV1P0_3( w_fp[5], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[12], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -406,7 +406,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 5
-      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[8], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -419,10 +419,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 72 ***
 
       // Wavefunction(s) for diagram number 6
-      helas_FFV1P0_3( w_fp[3], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 6
-      helas_VVV5_0( w_fp[6], w_fp[11], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[6], w_fp[11], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -435,10 +435,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 72 ***
 
       // Wavefunction(s) for diagram number 7
-      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 7
-      helas_FFV1_0( w_fp[5], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -454,7 +454,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 8
-      helas_FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -470,7 +470,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 9
-      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[13], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -486,7 +486,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 10
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -499,12 +499,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 72 ***
 
       // Wavefunction(s) for diagram number 11
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
-      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
-      helas_FFV1P0_3( w_fp[5], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_CD_FFV1P0_3( w_fp[5], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 11
-      helas_FFV1_0( w_fp[13], w_fp[4], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[4], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -518,7 +518,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 12
-      helas_FFV1_0( w_fp[13], w_fp[12], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[12], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -529,11 +529,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 13 OF 72 ***
 
       // Wavefunction(s) for diagram number 13
-      helas_FFV1_1( w_fp[4], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
-      helas_FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_CD_FFV1_1( w_fp[4], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 13
-      helas_FFV1_0( w_fp[5], w_fp[6], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[6], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -547,7 +547,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      helas_FFV1_0( w_fp[3], w_fp[6], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[6], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -558,10 +558,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 15 OF 72 ***
 
       // Wavefunction(s) for diagram number 15
-      helas_FFV1_2( w_fp[5], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
+      helas_CD_FFV1_2( w_fp[5], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
 
       // Amplitude(s) for diagram number 15
-      helas_FFV1_0( w_fp[15], w_fp[4], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[4], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -575,7 +575,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 16
-      helas_FFV1_0( w_fp[15], w_fp[12], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[12], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -586,10 +586,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 17 OF 72 ***
 
       // Wavefunction(s) for diagram number 17
-      helas_FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_CD_FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
 
       // Amplitude(s) for diagram number 17
-      helas_FFV1_0( w_fp[5], w_fp[16], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[16], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -603,7 +603,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 18
-      helas_VVV5_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -617,7 +617,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 19
-      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -631,7 +631,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 20
-      helas_VVV5_0( w_fp[1], w_fp[10], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[10], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -642,12 +642,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 21 OF 72 ***
 
       // Wavefunction(s) for diagram number 21
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
-      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      helas_FFV1P0_3( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_CD_FFV1P0_3( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 21
-      helas_FFV1_0( w_fp[5], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -661,7 +661,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 22
-      helas_FFV1_0( w_fp[14], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -672,10 +672,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 23 OF 72 ***
 
       // Wavefunction(s) for diagram number 23
-      helas_FFV1P0_3( w_fp[14], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[12] );
+      helas_CD_FFV1P0_3( w_fp[14], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[12] );
 
       // Amplitude(s) for diagram number 23
-      helas_FFV1_0( w_fp[5], w_fp[6], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[6], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -689,7 +689,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 24
-      helas_FFV1_0( w_fp[14], w_fp[6], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[6], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -703,7 +703,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 25
-      helas_FFV1_0( w_fp[15], w_fp[4], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[4], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -717,7 +717,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 26
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -728,10 +728,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 27 OF 72 ***
 
       // Wavefunction(s) for diagram number 27
-      helas_FFV1_2( w_fp[14], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
+      helas_CD_FFV1_2( w_fp[14], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
 
       // Amplitude(s) for diagram number 27
-      helas_FFV1_0( w_fp[17], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[17], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -745,7 +745,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 28
-      helas_VVV5_0( w_fp[1], w_fp[11], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[11], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -759,7 +759,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 29
-      helas_FFV1_0( w_fp[17], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[17], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -773,7 +773,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 30
-      helas_VVV5_0( w_fp[1], w_fp[10], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[10], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -784,11 +784,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 31 OF 72 ***
 
       // Wavefunction(s) for diagram number 31
-      helas_FFV1_1( w_fp[4], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
-      helas_FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] );
+      helas_CD_FFV1_1( w_fp[4], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] );
 
       // Amplitude(s) for diagram number 31
-      helas_FFV1_0( w_fp[5], w_fp[16], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[16], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -799,10 +799,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 32 OF 72 ***
 
       // Wavefunction(s) for diagram number 32
-      helas_FFV1P0_3( w_fp[5], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1P0_3( w_fp[5], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 32
-      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 32 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -816,7 +816,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 33
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -830,7 +830,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 34
-      helas_FFV1_0( w_fp[13], w_fp[12], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[12], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 34 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -844,7 +844,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 35
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -858,7 +858,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 36
-      helas_FFV1_0( w_fp[15], w_fp[12], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[12], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -869,10 +869,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 37 OF 72 ***
 
       // Wavefunction(s) for diagram number 37
-      helas_FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_CD_FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
 
       // Amplitude(s) for diagram number 37
-      helas_FFV1_0( w_fp[5], w_fp[14], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[14], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 37 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -886,7 +886,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 38
-      helas_VVV5_0( w_fp[1], w_fp[7], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[7], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 38 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -900,7 +900,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 39
-      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[14], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 39 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -914,7 +914,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 40
-      helas_VVV5_0( w_fp[1], w_fp[11], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[11], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 40 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -925,11 +925,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 41 OF 72 ***
 
       // Wavefunction(s) for diagram number 41
-      helas_FFV1_2( w_fp[5], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
-      helas_FFV1P0_3( w_fp[17], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_CD_FFV1_2( w_fp[5], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
+      helas_CD_FFV1P0_3( w_fp[17], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 41
-      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[16], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 41 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -943,7 +943,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 42
-      helas_FFV1_0( w_fp[17], w_fp[16], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[17], w_fp[16], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 42 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -954,10 +954,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 43 OF 72 ***
 
       // Wavefunction(s) for diagram number 43
-      helas_FFV1P0_3( w_fp[17], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1P0_3( w_fp[17], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 43
-      helas_FFV1_0( w_fp[13], w_fp[4], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[4], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 43 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -971,7 +971,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 44
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 44 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -985,7 +985,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 45
-      helas_FFV1_0( w_fp[3], w_fp[6], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[6], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 45 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -999,7 +999,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 46
-      helas_FFV1_0( w_fp[17], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[17], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 46 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1010,10 +1010,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 47 OF 72 ***
 
       // Wavefunction(s) for diagram number 47
-      helas_FFV1_2( w_fp[17], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_CD_FFV1_2( w_fp[17], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 47
-      helas_FFV1_0( w_fp[12], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 47 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1027,7 +1027,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 48
-      helas_VVV5_0( w_fp[1], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 48 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1041,7 +1041,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 49
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 49 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1055,7 +1055,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 50
-      helas_VVV5_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 50 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1066,10 +1066,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 51 OF 72 ***
 
       // Wavefunction(s) for diagram number 51
-      helas_FFV1_1( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_1( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 51
-      helas_FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 51 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1080,10 +1080,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 52 OF 72 ***
 
       // Wavefunction(s) for diagram number 52
-      helas_VVV5P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[12] );
+      helas_CD_VVV5P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[12] );
 
       // Amplitude(s) for diagram number 52
-      helas_FFV1_0( w_fp[5], w_fp[16], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[16], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 52 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1097,7 +1097,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 53
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 53 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1108,10 +1108,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 54 OF 72 ***
 
       // Wavefunction(s) for diagram number 54
-      helas_VVV5P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[9] );
+      helas_CD_VVV5P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 54
-      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 54 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1122,10 +1122,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 55 OF 72 ***
 
       // Wavefunction(s) for diagram number 55
-      helas_FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_CD_FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
 
       // Amplitude(s) for diagram number 55
-      helas_FFV1_0( w_fp[16], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 55 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1136,10 +1136,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 56 OF 72 ***
 
       // Wavefunction(s) for diagram number 56
-      helas_VVV5P0_1( w_fp[0], w_fp[11], COUPs[0], 1.0, 0., 0., w_fp[14] );
+      helas_CD_VVV5P0_1( w_fp[0], w_fp[11], COUPs[0], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 56
-      helas_FFV1_0( w_fp[13], w_fp[4], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[4], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 56 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1153,7 +1153,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 57
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 57 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1167,7 +1167,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 58
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 58 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1178,10 +1178,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 59 OF 72 ***
 
       // Wavefunction(s) for diagram number 59
-      helas_FFV1_1( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_CD_FFV1_1( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 59
-      helas_FFV1_0( w_fp[5], w_fp[13], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[13], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 59 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1192,10 +1192,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 60 OF 72 ***
 
       // Wavefunction(s) for diagram number 60
-      helas_VVV5P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[16] );
+      helas_CD_VVV5P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[16] );
 
       // Amplitude(s) for diagram number 60
-      helas_FFV1_0( w_fp[5], w_fp[6], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[6], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 60 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1209,7 +1209,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 61
-      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 61 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1223,7 +1223,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 62
-      helas_FFV1_0( w_fp[3], w_fp[6], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[6], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 62 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1234,10 +1234,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 63 OF 72 ***
 
       // Wavefunction(s) for diagram number 63
-      helas_FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
+      helas_CD_FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
 
       // Amplitude(s) for diagram number 63
-      helas_FFV1_0( w_fp[6], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 63 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1251,7 +1251,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 64
-      helas_FFV1_0( w_fp[15], w_fp[4], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[4], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 64 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1265,7 +1265,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 65
-      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 65 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1279,7 +1279,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 66
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 66 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1293,17 +1293,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 67
-      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += 1. / 2. * amp_sv[0];
       jamp_sv[2] -= 1. / 2. * amp_sv[0];
       jamp_sv[9] -= 1. / 2. * amp_sv[0];
       jamp_sv[10] += 1. / 2. * amp_sv[0];
-      helas_VVVV9_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV9_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[1] += 1. / 2. * amp_sv[0];
       jamp_sv[5] -= 1. / 2. * amp_sv[0];
       jamp_sv[6] -= 1. / 2. * amp_sv[0];
       jamp_sv[10] += 1. / 2. * amp_sv[0];
-      helas_VVVV10_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV10_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[2] += 1. / 2. * amp_sv[0];
       jamp_sv[5] -= 1. / 2. * amp_sv[0];
       jamp_sv[6] -= 1. / 2. * amp_sv[0];
@@ -1315,7 +1315,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 68
-      helas_VVV5_0( w_fp[1], w_fp[10], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[10], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 68 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1331,7 +1331,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 69
-      helas_VVV5_0( w_fp[1], w_fp[7], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[7], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 69 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1347,17 +1347,17 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 70
-      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= 1. / 2. * amp_sv[0];
       jamp_sv[3] += 1. / 2. * amp_sv[0];
       jamp_sv[8] += 1. / 2. * amp_sv[0];
       jamp_sv[11] -= 1. / 2. * amp_sv[0];
-      helas_VVVV9_0( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV9_0( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[0] -= 1. / 2. * amp_sv[0];
       jamp_sv[4] += 1. / 2. * amp_sv[0];
       jamp_sv[7] += 1. / 2. * amp_sv[0];
       jamp_sv[11] -= 1. / 2. * amp_sv[0];
-      helas_VVVV10_0( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV10_0( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
       jamp_sv[3] -= 1. / 2. * amp_sv[0];
       jamp_sv[4] += 1. / 2. * amp_sv[0];
       jamp_sv[7] += 1. / 2. * amp_sv[0];
@@ -1369,7 +1369,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 71
-      helas_VVV5_0( w_fp[1], w_fp[8], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[8], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 71 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -1385,7 +1385,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 72
-      helas_VVV5_0( w_fp[1], w_fp[11], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[11], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 72 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.pdf b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.pdf
index 39cdc07f21816faade12ca39a135028897eddf68..421aa3be878f13363e59bdf035ffc4d7f7ee59de 100644
GIT binary patch
delta 41
tcmcciN$lDuv4$4L7N!>F7M3ln-*zx+Oqblrs=%ZPp&2!&TkT|(0RUR$4l4iv

delta 41
tcmcciN$lDuv4$4L7N!>F7M3ln-*zzCOqblrs=#Cip&9L_TkT|(0RUex4t)Rs

diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h b/epochX/cudacpp/smeft_gg_tttt.mad/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
index 69e1c0cf7b..d664b36c32 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
@@ -1257,133 +1257,257 @@ namespace mg5amcCpu
 
 #ifndef MGONGPU_LINKER_HELAMPS
 
-#define helas_VVV5_0 VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVV5P0_1 VVV5P0_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
-#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
-#define helas_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVVV9_0 VVVV9_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVVV10_0 VVVV10_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CD_VVV5_0 VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVV5_0 VVV5_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVV5P0_1 VVV5P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVV5P0_1 VVV5P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_1 FFV1_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_2 FFV1_2<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1P0_3 FFV1P0_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV9_0 VVVV9_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV9_0 VVVV9_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV10_0 VVVV10_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV10_0 VVVV10_0<W_ACCESS, A_ACCESS, CI_ACCESS>
 
 #else
 
-#define helas_VVV5_0 linker_VVV5_0
-#define helas_VVV5P0_1 linker_VVV5P0_1
-#define helas_FFV1_0 linker_FFV1_0
-#define helas_FFV1_1 linker_FFV1_1
-#define helas_FFV1_2 linker_FFV1_2
-#define helas_FFV1P0_3 linker_FFV1P0_3
-#define helas_VVVV1_0 linker_VVVV1_0
-#define helas_VVVV9_0 linker_VVVV9_0
-#define helas_VVVV10_0 linker_VVVV10_0
+#define helas_CD_VVV5_0 linker_CD_VVV5_0
+#define helas_CI_VVV5_0 linker_CI_VVV5_0
+#define helas_CD_VVV5P0_1 linker_CD_VVV5P0_1
+#define helas_CI_VVV5P0_1 linker_CI_VVV5P0_1
+#define helas_CD_FFV1_0 linker_CD_FFV1_0
+#define helas_CI_FFV1_0 linker_CI_FFV1_0
+#define helas_CD_FFV1_1 linker_CD_FFV1_1
+#define helas_CI_FFV1_1 linker_CI_FFV1_1
+#define helas_CD_FFV1_2 linker_CD_FFV1_2
+#define helas_CI_FFV1_2 linker_CI_FFV1_2
+#define helas_CD_FFV1P0_3 linker_CD_FFV1P0_3
+#define helas_CI_FFV1P0_3 linker_CI_FFV1P0_3
+#define helas_CD_VVVV1_0 linker_CD_VVVV1_0
+#define helas_CI_VVVV1_0 linker_CI_VVVV1_0
+#define helas_CD_VVVV9_0 linker_CD_VVVV9_0
+#define helas_CI_VVVV9_0 linker_CI_VVVV9_0
+#define helas_CD_VVVV10_0 linker_CD_VVVV10_0
+#define helas_CI_VVVV10_0 linker_CI_VVVV10_0
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV5_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_VVV5_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVV5P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] );
+  linker_CI_VVV5_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_VVV5P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] );
+  linker_CI_VVV5P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] );
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] );
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV1_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] );
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV9_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] );
+  linker_CI_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV9_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV9_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV10_0( const fptype allV1[],
+                      const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allV4[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
   __device__ void
-  linker_VVVV10_0( const fptype allV1[],
-                   const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allV4[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   fptype allvertexes[] );
+  linker_CI_VVVV10_0( const fptype allV1[],
+                      const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allV4[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
index 7f7526e856..0b61fdbda6 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
@@ -77,7 +77,7 @@ INFO: load vertices
 [1;32mDEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1) [0m
 [1;32mDEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3) [0m
 [1;32mDEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1) [0m
-[1;32mDEBUG: model prefixing  takes 0.14350485801696777 [0m
+[1;32mDEBUG: model prefixing  takes 0.14379262924194336 [0m
 INFO: Change particles name to pass to MG5 convention 
 Defined multiparticle p = g u c d s u~ c~ d~ s~
 Defined multiparticle j = g u c d s u~ c~ d~ s~
@@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 
 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1  
 INFO: Process has 72 diagrams 
-1 processes with 72 diagrams generated in 3.857 s
+1 processes with 72 diagrams generated in 3.829 s
 Total: 1 processes with 72 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt
 Load PLUGIN.CUDACPP_OUTPUT
@@ -115,14 +115,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. 
-Generated helas calls for 1 subprocesses (72 diagrams) in 0.198 s
+Generated helas calls for 1 subprocesses (72 diagrams) in 0.195 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV5 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV9 routines[0m
 ALOHA: aloha creates VVVV10 routines[0m
-ALOHA: aloha creates 5 routines in  0.329 s
+ALOHA: aloha creates 5 routines in  0.331 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -144,7 +144,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. 
 quit
 
-real	0m5.460s
-user	0m5.214s
-sys	0m0.079s
+real	0m5.290s
+user	0m5.191s
+sys	0m0.065s
 Code generation completed in 6 seconds
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/HelAmps.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/HelAmps.cc
index b1e8df624a..a5fef21de4 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/HelAmps.cc
@@ -62,137 +62,270 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV5_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_VVV5_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVV5P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] )
+  linker_CI_VVV5_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return VVV5_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVV5P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
     return VVV5P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_VVV5P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
+  {
+    return VVV5P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] )
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
   {
     return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] )
+  linker_CI_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CI_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
   {
     return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CI_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] )
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
   {
     return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
   __device__ void
-  linker_VVVV1_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] )
+  linker_CD_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
   {
     return VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
   __device__ void
-  linker_VVVV9_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] )
+  linker_CI_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV9_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
   {
     return VVVV9_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV9_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVVV9_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
   __device__ void
-  linker_VVVV10_0( const fptype allV1[],
-                   const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allV4[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   fptype allvertexes[] )
+  linker_CD_VVVV10_0( const fptype allV1[],
+                      const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allV4[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      fptype allvertexes[] )
   {
     return VVVV10_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV10_0( const fptype allV1[],
+                      const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allV4[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      fptype allvertexes[] )
+  {
+    return VVVV10_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allV3, allV4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
 }
 #endif
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc
index 2946a93055..aabaa8a0cc 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc
@@ -336,12 +336,12 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][5], -1, w_fp[5], 5 );
 
-      helas_VVV5P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      helas_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
-      helas_FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
+      helas_CD_VVV5P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] );
+      helas_CD_FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -353,10 +353,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 72 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[9], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -368,10 +368,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 72 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 3
-      helas_VVV5_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -383,11 +383,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 72 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_FFV1P0_3( w_fp[5], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[11] );
-      helas_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_CD_FFV1P0_3( w_fp[5], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[11] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 4
-      helas_FFV1_0( w_fp[12], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -402,7 +402,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 5
-      helas_FFV1_0( w_fp[3], w_fp[8], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[8], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -414,10 +414,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 72 ***
 
       // Wavefunction(s) for diagram number 6
-      helas_FFV1P0_3( w_fp[3], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[8] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 6
-      helas_VVV5_0( w_fp[6], w_fp[11], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[6], w_fp[11], w_fp[8], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -429,10 +429,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 72 ***
 
       // Wavefunction(s) for diagram number 7
-      helas_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 7
-      helas_FFV1_0( w_fp[5], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[13], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -447,7 +447,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 8
-      helas_FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -462,7 +462,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 9
-      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[13], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -477,7 +477,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 10
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -489,12 +489,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 11 OF 72 ***
 
       // Wavefunction(s) for diagram number 11
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
-      helas_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
-      helas_FFV1P0_3( w_fp[5], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_CD_FFV1P0_3( w_fp[5], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 11
-      helas_FFV1_0( w_fp[13], w_fp[4], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[4], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -507,7 +507,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 12
-      helas_FFV1_0( w_fp[13], w_fp[12], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[12], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -517,11 +517,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 13 OF 72 ***
 
       // Wavefunction(s) for diagram number 13
-      helas_FFV1_1( w_fp[4], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
-      helas_FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_CD_FFV1_1( w_fp[4], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 13
-      helas_FFV1_0( w_fp[5], w_fp[6], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[6], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -534,7 +534,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 14
-      helas_FFV1_0( w_fp[3], w_fp[6], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[6], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -544,10 +544,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 15 OF 72 ***
 
       // Wavefunction(s) for diagram number 15
-      helas_FFV1_2( w_fp[5], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
+      helas_CD_FFV1_2( w_fp[5], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] );
 
       // Amplitude(s) for diagram number 15
-      helas_FFV1_0( w_fp[15], w_fp[4], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[4], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -560,7 +560,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 16
-      helas_FFV1_0( w_fp[15], w_fp[12], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[12], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -570,10 +570,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 17 OF 72 ***
 
       // Wavefunction(s) for diagram number 17
-      helas_FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_CD_FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
 
       // Amplitude(s) for diagram number 17
-      helas_FFV1_0( w_fp[5], w_fp[16], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[16], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -586,7 +586,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 18
-      helas_VVV5_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -599,7 +599,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 19
-      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -612,7 +612,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 20
-      helas_VVV5_0( w_fp[1], w_fp[10], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[10], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -622,12 +622,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 21 OF 72 ***
 
       // Wavefunction(s) for diagram number 21
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
-      helas_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
-      helas_FFV1P0_3( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_CD_FFV1P0_3( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 21
-      helas_FFV1_0( w_fp[5], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -640,7 +640,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 22
-      helas_FFV1_0( w_fp[14], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[16], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -650,10 +650,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 23 OF 72 ***
 
       // Wavefunction(s) for diagram number 23
-      helas_FFV1P0_3( w_fp[14], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[12] );
+      helas_CD_FFV1P0_3( w_fp[14], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[12] );
 
       // Amplitude(s) for diagram number 23
-      helas_FFV1_0( w_fp[5], w_fp[6], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[6], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -666,7 +666,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 24
-      helas_FFV1_0( w_fp[14], w_fp[6], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[14], w_fp[6], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -679,7 +679,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 25
-      helas_FFV1_0( w_fp[15], w_fp[4], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[4], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -692,7 +692,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 26
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -702,10 +702,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 27 OF 72 ***
 
       // Wavefunction(s) for diagram number 27
-      helas_FFV1_2( w_fp[14], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
+      helas_CD_FFV1_2( w_fp[14], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
 
       // Amplitude(s) for diagram number 27
-      helas_FFV1_0( w_fp[17], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[17], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -718,7 +718,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 28
-      helas_VVV5_0( w_fp[1], w_fp[11], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[11], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -731,7 +731,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 29
-      helas_FFV1_0( w_fp[17], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[17], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -744,7 +744,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 30
-      helas_VVV5_0( w_fp[1], w_fp[10], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[10], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -754,11 +754,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 31 OF 72 ***
 
       // Wavefunction(s) for diagram number 31
-      helas_FFV1_1( w_fp[4], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
-      helas_FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] );
+      helas_CD_FFV1_1( w_fp[4], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] );
 
       // Amplitude(s) for diagram number 31
-      helas_FFV1_0( w_fp[5], w_fp[16], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[16], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -768,10 +768,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 32 OF 72 ***
 
       // Wavefunction(s) for diagram number 32
-      helas_FFV1P0_3( w_fp[5], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1P0_3( w_fp[5], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 32
-      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -784,7 +784,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 33
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -797,7 +797,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 34
-      helas_FFV1_0( w_fp[13], w_fp[12], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[12], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -810,7 +810,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 35
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[17], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -823,7 +823,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 36
-      helas_FFV1_0( w_fp[15], w_fp[12], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[12], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -833,10 +833,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 37 OF 72 ***
 
       // Wavefunction(s) for diagram number 37
-      helas_FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
+      helas_CD_FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] );
 
       // Amplitude(s) for diagram number 37
-      helas_FFV1_0( w_fp[5], w_fp[14], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[14], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -849,7 +849,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 38
-      helas_VVV5_0( w_fp[1], w_fp[7], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[7], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -862,7 +862,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 39
-      helas_FFV1_0( w_fp[3], w_fp[14], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[14], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -875,7 +875,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 40
-      helas_VVV5_0( w_fp[1], w_fp[11], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[11], w_fp[17], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -885,11 +885,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 41 OF 72 ***
 
       // Wavefunction(s) for diagram number 41
-      helas_FFV1_2( w_fp[5], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
-      helas_FFV1P0_3( w_fp[17], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[14] );
+      helas_CD_FFV1_2( w_fp[5], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] );
+      helas_CD_FFV1P0_3( w_fp[17], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 41
-      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[16], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -902,7 +902,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 42
-      helas_FFV1_0( w_fp[17], w_fp[16], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[17], w_fp[16], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -912,10 +912,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 43 OF 72 ***
 
       // Wavefunction(s) for diagram number 43
-      helas_FFV1P0_3( w_fp[17], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1P0_3( w_fp[17], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 43
-      helas_FFV1_0( w_fp[13], w_fp[4], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[4], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -928,7 +928,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 44
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -941,7 +941,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 45
-      helas_FFV1_0( w_fp[3], w_fp[6], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[6], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -954,7 +954,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 46
-      helas_FFV1_0( w_fp[17], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[17], w_fp[6], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -964,10 +964,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 47 OF 72 ***
 
       // Wavefunction(s) for diagram number 47
-      helas_FFV1_2( w_fp[17], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
+      helas_CD_FFV1_2( w_fp[17], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] );
 
       // Amplitude(s) for diagram number 47
-      helas_FFV1_0( w_fp[12], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -980,7 +980,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 48
-      helas_VVV5_0( w_fp[1], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[7], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -993,7 +993,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 49
-      helas_FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1006,7 +1006,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 50
-      helas_VVV5_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1016,10 +1016,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 51 OF 72 ***
 
       // Wavefunction(s) for diagram number 51
-      helas_FFV1_1( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
+      helas_CD_FFV1_1( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] );
 
       // Amplitude(s) for diagram number 51
-      helas_FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1029,10 +1029,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 52 OF 72 ***
 
       // Wavefunction(s) for diagram number 52
-      helas_VVV5P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[12] );
+      helas_CD_VVV5P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[12] );
 
       // Amplitude(s) for diagram number 52
-      helas_FFV1_0( w_fp[5], w_fp[16], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[16], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1045,7 +1045,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 53
-      helas_FFV1_0( w_fp[3], w_fp[9], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[9], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1055,10 +1055,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 54 OF 72 ***
 
       // Wavefunction(s) for diagram number 54
-      helas_VVV5P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[9] );
+      helas_CD_VVV5P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[9] );
 
       // Amplitude(s) for diagram number 54
-      helas_FFV1_0( w_fp[3], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[16], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1068,10 +1068,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 55 OF 72 ***
 
       // Wavefunction(s) for diagram number 55
-      helas_FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
+      helas_CD_FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] );
 
       // Amplitude(s) for diagram number 55
-      helas_FFV1_0( w_fp[16], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[4], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1081,10 +1081,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 56 OF 72 ***
 
       // Wavefunction(s) for diagram number 56
-      helas_VVV5P0_1( w_fp[0], w_fp[11], COUPs[0], 1.0, 0., 0., w_fp[14] );
+      helas_CD_VVV5P0_1( w_fp[0], w_fp[11], COUPs[0], 1.0, 0., 0., w_fp[14] );
 
       // Amplitude(s) for diagram number 56
-      helas_FFV1_0( w_fp[13], w_fp[4], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[4], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1097,7 +1097,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 57
-      helas_FFV1_0( w_fp[16], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[16], w_fp[2], w_fp[10], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1110,7 +1110,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 58
-      helas_FFV1_0( w_fp[13], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[13], w_fp[2], w_fp[9], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1120,10 +1120,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 59 OF 72 ***
 
       // Wavefunction(s) for diagram number 59
-      helas_FFV1_1( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
+      helas_CD_FFV1_1( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] );
 
       // Amplitude(s) for diagram number 59
-      helas_FFV1_0( w_fp[5], w_fp[13], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[13], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1133,10 +1133,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 60 OF 72 ***
 
       // Wavefunction(s) for diagram number 60
-      helas_VVV5P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[16] );
+      helas_CD_VVV5P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[16] );
 
       // Amplitude(s) for diagram number 60
-      helas_FFV1_0( w_fp[5], w_fp[6], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[6], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1149,7 +1149,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 61
-      helas_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1162,7 +1162,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 62
-      helas_FFV1_0( w_fp[3], w_fp[6], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[6], w_fp[14], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1172,10 +1172,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 63 OF 72 ***
 
       // Wavefunction(s) for diagram number 63
-      helas_FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
+      helas_CD_FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] );
 
       // Amplitude(s) for diagram number 63
-      helas_FFV1_0( w_fp[6], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[4], w_fp[7], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1188,7 +1188,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 64
-      helas_FFV1_0( w_fp[15], w_fp[4], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[4], w_fp[16], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1201,7 +1201,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 65
-      helas_FFV1_0( w_fp[6], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[2], w_fp[8], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1214,7 +1214,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 66
-      helas_FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1227,7 +1227,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 67
-      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1235,7 +1235,7 @@ namespace mg5amcCpu
       jamp_sv[2] -= 1. / 2. * amp_sv[0];
       jamp_sv[9] -= 1. / 2. * amp_sv[0];
       jamp_sv[10] += 1. / 2. * amp_sv[0];
-      helas_VVVV9_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV9_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1243,7 +1243,7 @@ namespace mg5amcCpu
       jamp_sv[5] -= 1. / 2. * amp_sv[0];
       jamp_sv[6] -= 1. / 2. * amp_sv[0];
       jamp_sv[10] += 1. / 2. * amp_sv[0];
-      helas_VVVV10_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV10_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1258,7 +1258,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 68
-      helas_VVV5_0( w_fp[1], w_fp[10], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[10], w_fp[16], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1273,7 +1273,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 69
-      helas_VVV5_0( w_fp[1], w_fp[7], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[7], w_fp[9], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1288,7 +1288,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 70
-      helas_VVVV1_0( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV1_0( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1296,7 +1296,7 @@ namespace mg5amcCpu
       jamp_sv[3] += 1. / 2. * amp_sv[0];
       jamp_sv[8] += 1. / 2. * amp_sv[0];
       jamp_sv[11] -= 1. / 2. * amp_sv[0];
-      helas_VVVV9_0( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV9_0( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1304,7 +1304,7 @@ namespace mg5amcCpu
       jamp_sv[4] += 1. / 2. * amp_sv[0];
       jamp_sv[7] += 1. / 2. * amp_sv[0];
       jamp_sv[11] -= 1. / 2. * amp_sv[0];
-      helas_VVVV10_0( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VVVV10_0( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1319,7 +1319,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 71
-      helas_VVV5_0( w_fp[1], w_fp[8], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[8], w_fp[14], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -1334,7 +1334,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 72
-      helas_VVV5_0( w_fp[1], w_fp[11], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVV5_0( w_fp[1], w_fp[11], w_fp[12], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h b/epochX/cudacpp/smeft_gg_tttt.sa/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
index 69e1c0cf7b..d664b36c32 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
@@ -1257,133 +1257,257 @@ namespace mg5amcCpu
 
 #ifndef MGONGPU_LINKER_HELAMPS
 
-#define helas_VVV5_0 VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVV5P0_1 VVV5P0_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
-#define helas_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
-#define helas_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVVV9_0 VVVV9_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VVVV10_0 VVVV10_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CD_VVV5_0 VVV5_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVV5_0 VVV5_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVV5P0_1 VVV5P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVV5P0_1 VVV5P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_1 FFV1_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_2 FFV1_2<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1P0_3 FFV1P0_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV1_0 VVVV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV9_0 VVVV9_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV9_0 VVVV9_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VVVV10_0 VVVV10_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVVV10_0 VVVV10_0<W_ACCESS, A_ACCESS, CI_ACCESS>
 
 #else
 
-#define helas_VVV5_0 linker_VVV5_0
-#define helas_VVV5P0_1 linker_VVV5P0_1
-#define helas_FFV1_0 linker_FFV1_0
-#define helas_FFV1_1 linker_FFV1_1
-#define helas_FFV1_2 linker_FFV1_2
-#define helas_FFV1P0_3 linker_FFV1P0_3
-#define helas_VVVV1_0 linker_VVVV1_0
-#define helas_VVVV9_0 linker_VVVV9_0
-#define helas_VVVV10_0 linker_VVVV10_0
+#define helas_CD_VVV5_0 linker_CD_VVV5_0
+#define helas_CI_VVV5_0 linker_CI_VVV5_0
+#define helas_CD_VVV5P0_1 linker_CD_VVV5P0_1
+#define helas_CI_VVV5P0_1 linker_CI_VVV5P0_1
+#define helas_CD_FFV1_0 linker_CD_FFV1_0
+#define helas_CI_FFV1_0 linker_CI_FFV1_0
+#define helas_CD_FFV1_1 linker_CD_FFV1_1
+#define helas_CI_FFV1_1 linker_CI_FFV1_1
+#define helas_CD_FFV1_2 linker_CD_FFV1_2
+#define helas_CI_FFV1_2 linker_CI_FFV1_2
+#define helas_CD_FFV1P0_3 linker_CD_FFV1P0_3
+#define helas_CI_FFV1P0_3 linker_CI_FFV1P0_3
+#define helas_CD_VVVV1_0 linker_CD_VVVV1_0
+#define helas_CI_VVVV1_0 linker_CI_VVVV1_0
+#define helas_CD_VVVV9_0 linker_CD_VVVV9_0
+#define helas_CI_VVVV9_0 linker_CI_VVVV9_0
+#define helas_CD_VVVV10_0 linker_CD_VVVV10_0
+#define helas_CI_VVVV10_0 linker_CI_VVVV10_0
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV5_0( const fptype allV1[],
-                 const fptype allV2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_VVV5_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVV5P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] );
+  linker_CI_VVV5_0( const fptype allV1[],
+                    const fptype allV2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CD_VVV5P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] );
+  linker_CI_VVV5P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] );
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1P0_3( const fptype allF1[],
-                   const fptype allF2[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M3,
-                   const fptype W3,
-                   fptype allV3[] );
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVVV1_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] );
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VVVV9_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allV3[],
-                  const fptype allV4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] );
+  linker_CI_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6]
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV9_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVVV9_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allV3[],
+                     const fptype allV4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVVV10_0( const fptype allV1[],
+                      const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allV4[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] (independent couplings)
   __device__ void
-  linker_VVVV10_0( const fptype allV1[],
-                   const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allV4[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   fptype allvertexes[] );
+  linker_CI_VVVV10_0( const fptype allV1[],
+                      const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allV4[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt
index c26505d3a3..e4b5a44402 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt
@@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1  
 INFO: Process has 6 diagrams 
-1 processes with 6 diagrams generated in 0.128 s
+1 processes with 6 diagrams generated in 0.127 s
 Total: 1 processes with 6 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -576,8 +576,8 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t1 t1~ @1 
 INFO: Creating files in directory P1_gg_t1t1x 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7ff7c7593a90> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fcd0c7c1a90> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -593,21 +593,21 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t1 t1~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_t1t1x 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses/P1_gg_t1t1x [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [1;30m[model_handling.py at line 1626][0m [0m
 Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s
 Wrote files for 16 helas calls in 0.128 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VSS1 routines[0m
 ALOHA: aloha creates VVSS1 routines[0m
-ALOHA: aloha creates 3 routines in  0.190 s
+ALOHA: aloha creates 3 routines in  0.191 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VSS1 routines[0m
 ALOHA: aloha creates VVSS1 routines[0m
-ALOHA: aloha creates 6 routines in  0.187 s
+ALOHA: aloha creates 6 routines in  0.188 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
@@ -646,9 +646,9 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m3.084s
-user	0m2.771s
-sys	0m0.313s
+real	0m3.094s
+user	0m2.788s
+sys	0m0.304s
 Code generation completed in 3 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/HelAmps.cc
index b984c35d2f..625bce6981 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/HelAmps.cc
@@ -62,78 +62,152 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] )
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
     return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], S2[6], S3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VSS1_0( const fptype allV1[],
-                 const fptype allS2[],
-                 const fptype allS3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], S2[6], S3[6] (dependent couplings)
+  __device__ void
+  linker_CD_VSS1_0( const fptype allV1[],
+                    const fptype allS2[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return VSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allS2, allS3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'S2[6]' from the input wavefunctions V1[6], S3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], S2[6], S3[6] (independent couplings)
   __device__ void
-  linker_VSS1_2( const fptype allV1[],
-                 const fptype allS3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allS2[] )
+  linker_CI_VSS1_0( const fptype allV1[],
+                    const fptype allS2[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return VSS1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allS2, allS3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'S2[6]' from the input wavefunctions V1[6], S3[6] (dependent couplings)
+  __device__ void
+  linker_CD_VSS1_2( const fptype allV1[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allS2[] )
   {
     return VSS1_2<W_ACCESS, CD_ACCESS>( allV1, allS3, allCOUP, Ccoeff, M2, W2, allS2 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], S2[6]
+  // Compute the output wavefunction 'S2[6]' from the input wavefunctions V1[6], S3[6] (independent couplings)
+  __device__ void
+  linker_CI_VSS1_2( const fptype allV1[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allS2[] )
+  {
+    return VSS1_2<W_ACCESS, CI_ACCESS>( allV1, allS3, allCOUP, Ccoeff, M2, W2, allS2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], S2[6] (dependent couplings)
   __device__ void
-  linker_VSS1_3( const fptype allV1[],
-                 const fptype allS2[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M3,
-                 const fptype W3,
-                 fptype allS3[] )
+  linker_CD_VSS1_3( const fptype allV1[],
+                    const fptype allS2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allS3[] )
   {
     return VSS1_3<W_ACCESS, CD_ACCESS>( allV1, allS2, allCOUP, Ccoeff, M3, W3, allS3 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], S3[6], S4[6]
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], S2[6] (independent couplings)
   __device__ void
-  linker_VVSS1_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allS3[],
-                  const fptype allS4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] )
+  linker_CI_VSS1_3( const fptype allV1[],
+                    const fptype allS2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allS3[] )
+  {
+    return VSS1_3<W_ACCESS, CI_ACCESS>( allV1, allS2, allCOUP, Ccoeff, M3, W3, allS3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], S3[6], S4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVSS1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allS3[],
+                     const fptype allS4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
   {
     return VVSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allS3, allS4, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], S3[6], S4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVSS1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allS3[],
+                     const fptype allS4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVSS1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allS3, allS4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
 }
 #endif
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc
index 5185b0d399..d61f773789 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc
@@ -333,18 +333,18 @@ namespace mg5amcCpu
       sxxxxx<M_ACCESS, W_ACCESS>( momenta, +1, w_fp[3], 3 );
 
       // Amplitude(s) for diagram number 1
-      helas_VVSS1_0( w_fp[0], w_fp[1], w_fp[3], w_fp[2], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVSS1_0( w_fp[0], w_fp[1], w_fp[3], w_fp[2], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[1] += amp_sv[0];
-      helas_VVSS1_0( w_fp[0], w_fp[1], w_fp[3], w_fp[2], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVSS1_0( w_fp[0], w_fp[1], w_fp[3], w_fp[2], COUPs[0], 1.0, &amp_fp[0] );
       jamp_sv[0] += amp_sv[0];
 
       // *** DIAGRAM 2 OF 6 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[4] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      helas_VSS1_0( w_fp[4], w_fp[3], w_fp[2], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VSS1_0( w_fp[4], w_fp[3], w_fp[2], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -355,10 +355,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 6 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_VSS1_2( w_fp[0], w_fp[2], COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_CD_VSS1_2( w_fp[0], w_fp[2], COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 3
-      helas_VSS1_0( w_fp[1], w_fp[3], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VSS1_0( w_fp[1], w_fp[3], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -368,10 +368,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 6 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_VSS1_3( w_fp[0], w_fp[2], COUPs[3], -1.0, cIPD[2], cIPD[3], w_fp[4] );
+      helas_CD_VSS1_3( w_fp[0], w_fp[2], COUPs[3], -1.0, cIPD[2], cIPD[3], w_fp[4] );
 
       // Amplitude(s) for diagram number 4
-      helas_VSS1_0( w_fp[1], w_fp[3], w_fp[4], COUPs[3], 1.0, &amp_fp[0] );
+      helas_CD_VSS1_0( w_fp[1], w_fp[3], w_fp[4], COUPs[3], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -381,10 +381,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 6 ***
 
       // Wavefunction(s) for diagram number 5
-      helas_VSS1_3( w_fp[0], w_fp[3], COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_CD_VSS1_3( w_fp[0], w_fp[3], COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 5
-      helas_VSS1_0( w_fp[1], w_fp[4], w_fp[2], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VSS1_0( w_fp[1], w_fp[4], w_fp[2], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -394,10 +394,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 6 ***
 
       // Wavefunction(s) for diagram number 6
-      helas_VSS1_3( w_fp[0], w_fp[3], COUPs[3], 1.0, cIPD[2], cIPD[3], w_fp[4] );
+      helas_CD_VSS1_3( w_fp[0], w_fp[3], COUPs[3], 1.0, cIPD[2], cIPD[3], w_fp[4] );
 
       // Amplitude(s) for diagram number 6
-      helas_VSS1_0( w_fp[1], w_fp[2], w_fp[4], COUPs[3], -1.0, &amp_fp[0] );
+      helas_CD_VSS1_0( w_fp[1], w_fp[2], w_fp[4], COUPs[3], -1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h
index ed348cf878..afc3bad07e 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h
@@ -1077,78 +1077,147 @@ namespace mg5amcCpu
 
 #ifndef MGONGPU_LINKER_HELAMPS
 
-#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_VSS1_0 VSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VSS1_2 VSS1_2<W_ACCESS, CD_ACCESS>
-#define helas_VSS1_3 VSS1_3<W_ACCESS, CD_ACCESS>
-#define helas_VVSS1_0 VVSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CD_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1P0_1 VVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_VSS1_0 VSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VSS1_0 VSS1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VSS1_2 VSS1_2<W_ACCESS, CD_ACCESS>
+#define helas_CI_VSS1_2 VSS1_2<W_ACCESS, CI_ACCESS>
+#define helas_CD_VSS1_3 VSS1_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_VSS1_3 VSS1_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVSS1_0 VVSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVSS1_0 VVSS1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
 
 #else
 
-#define helas_VVV1P0_1 linker_VVV1P0_1
-#define helas_VSS1_0 linker_VSS1_0
-#define helas_VSS1_2 linker_VSS1_2
-#define helas_VSS1_3 linker_VSS1_3
-#define helas_VVSS1_0 linker_VVSS1_0
+#define helas_CD_VVV1P0_1 linker_CD_VVV1P0_1
+#define helas_CI_VVV1P0_1 linker_CI_VVV1P0_1
+#define helas_CD_VSS1_0 linker_CD_VSS1_0
+#define helas_CI_VSS1_0 linker_CI_VSS1_0
+#define helas_CD_VSS1_2 linker_CD_VSS1_2
+#define helas_CI_VSS1_2 linker_CI_VSS1_2
+#define helas_CD_VSS1_3 linker_CD_VSS1_3
+#define helas_CI_VSS1_3 linker_CI_VSS1_3
+#define helas_CD_VVSS1_0 linker_CD_VVSS1_0
+#define helas_CI_VVSS1_0 linker_CI_VVSS1_0
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] );
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], S2[6], S3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VSS1_0( const fptype allV1[],
-                 const fptype allS2[],
-                 const fptype allS3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'S2[6]' from the input wavefunctions V1[6], S3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], S2[6], S3[6] (dependent couplings)
   __device__ void
-  linker_VSS1_2( const fptype allV1[],
-                 const fptype allS3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allS2[] );
+  linker_CD_VSS1_0( const fptype allV1[],
+                    const fptype allS2[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], S2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], S2[6], S3[6] (independent couplings)
   __device__ void
-  linker_VSS1_3( const fptype allV1[],
-                 const fptype allS2[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M3,
-                 const fptype W3,
-                 fptype allS3[] );
+  linker_CI_VSS1_0( const fptype allV1[],
+                    const fptype allS2[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], S3[6], S4[6]
+  // Compute the output wavefunction 'S2[6]' from the input wavefunctions V1[6], S3[6] (dependent couplings)
+  __device__ void
+  linker_CD_VSS1_2( const fptype allV1[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allS2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'S2[6]' from the input wavefunctions V1[6], S3[6] (independent couplings)
+  __device__ void
+  linker_CI_VSS1_2( const fptype allV1[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allS2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], S2[6] (dependent couplings)
+  __device__ void
+  linker_CD_VSS1_3( const fptype allV1[],
+                    const fptype allS2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allS3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], S2[6] (independent couplings)
+  __device__ void
+  linker_CI_VSS1_3( const fptype allV1[],
+                    const fptype allS2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allS3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], S3[6], S4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVSS1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allS3[],
+                     const fptype allS4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], S3[6], S4[6] (independent couplings)
   __device__ void
-  linker_VVSS1_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allS3[],
-                  const fptype allS4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] );
+  linker_CI_VVSS1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allS3[],
+                     const fptype allS4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt
index 1d876e6d5b..c00973accb 100644
--- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt
+++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt
@@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1  
 INFO: Process has 6 diagrams 
-1 processes with 6 diagrams generated in 0.129 s
+1 processes with 6 diagrams generated in 0.128 s
 Total: 1 processes with 6 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1
 Load PLUGIN.CUDACPP_OUTPUT
@@ -600,7 +600,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. 
 quit
 
-real	0m1.386s
-user	0m1.297s
-sys	0m0.081s
-Code generation completed in 1 seconds
+real	0m1.376s
+user	0m1.301s
+sys	0m0.068s
+Code generation completed in 2 seconds
diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/HelAmps.cc b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/HelAmps.cc
index b984c35d2f..625bce6981 100644
--- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/HelAmps.cc
@@ -62,78 +62,152 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] )
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
     return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], S2[6], S3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VSS1_0( const fptype allV1[],
-                 const fptype allS2[],
-                 const fptype allS3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], S2[6], S3[6] (dependent couplings)
+  __device__ void
+  linker_CD_VSS1_0( const fptype allV1[],
+                    const fptype allS2[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return VSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allS2, allS3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'S2[6]' from the input wavefunctions V1[6], S3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], S2[6], S3[6] (independent couplings)
   __device__ void
-  linker_VSS1_2( const fptype allV1[],
-                 const fptype allS3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allS2[] )
+  linker_CI_VSS1_0( const fptype allV1[],
+                    const fptype allS2[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return VSS1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allS2, allS3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'S2[6]' from the input wavefunctions V1[6], S3[6] (dependent couplings)
+  __device__ void
+  linker_CD_VSS1_2( const fptype allV1[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allS2[] )
   {
     return VSS1_2<W_ACCESS, CD_ACCESS>( allV1, allS3, allCOUP, Ccoeff, M2, W2, allS2 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], S2[6]
+  // Compute the output wavefunction 'S2[6]' from the input wavefunctions V1[6], S3[6] (independent couplings)
+  __device__ void
+  linker_CI_VSS1_2( const fptype allV1[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allS2[] )
+  {
+    return VSS1_2<W_ACCESS, CI_ACCESS>( allV1, allS3, allCOUP, Ccoeff, M2, W2, allS2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], S2[6] (dependent couplings)
   __device__ void
-  linker_VSS1_3( const fptype allV1[],
-                 const fptype allS2[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M3,
-                 const fptype W3,
-                 fptype allS3[] )
+  linker_CD_VSS1_3( const fptype allV1[],
+                    const fptype allS2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allS3[] )
   {
     return VSS1_3<W_ACCESS, CD_ACCESS>( allV1, allS2, allCOUP, Ccoeff, M3, W3, allS3 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], S3[6], S4[6]
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], S2[6] (independent couplings)
   __device__ void
-  linker_VVSS1_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allS3[],
-                  const fptype allS4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] )
+  linker_CI_VSS1_3( const fptype allV1[],
+                    const fptype allS2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allS3[] )
+  {
+    return VSS1_3<W_ACCESS, CI_ACCESS>( allV1, allS2, allCOUP, Ccoeff, M3, W3, allS3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], S3[6], S4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVSS1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allS3[],
+                     const fptype allS4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
   {
     return VVSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allV1, allV2, allS3, allS4, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], S3[6], S4[6] (independent couplings)
+  __device__ void
+  linker_CI_VVSS1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allS3[],
+                     const fptype allS4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] )
+  {
+    return VVSS1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allV1, allV2, allS3, allS4, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
 }
 #endif
diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.cc b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.cc
index 1b8627679f..d48824a28e 100644
--- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.cc
+++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.cc
@@ -333,12 +333,12 @@ namespace mg5amcCpu
       sxxxxx<M_ACCESS, W_ACCESS>( momenta, +1, w_fp[3], 3 );
 
       // Amplitude(s) for diagram number 1
-      helas_VVSS1_0( w_fp[0], w_fp[1], w_fp[3], w_fp[2], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVSS1_0( w_fp[0], w_fp[1], w_fp[3], w_fp[2], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
       jamp_sv[1] += amp_sv[0];
-      helas_VVSS1_0( w_fp[0], w_fp[1], w_fp[3], w_fp[2], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_VVSS1_0( w_fp[0], w_fp[1], w_fp[3], w_fp[2], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -347,10 +347,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 6 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[4] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      helas_VSS1_0( w_fp[4], w_fp[3], w_fp[2], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VSS1_0( w_fp[4], w_fp[3], w_fp[2], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -360,10 +360,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 6 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_VSS1_2( w_fp[0], w_fp[2], COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_CD_VSS1_2( w_fp[0], w_fp[2], COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 3
-      helas_VSS1_0( w_fp[1], w_fp[3], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VSS1_0( w_fp[1], w_fp[3], w_fp[4], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -372,10 +372,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 4 OF 6 ***
 
       // Wavefunction(s) for diagram number 4
-      helas_VSS1_3( w_fp[0], w_fp[2], COUPs[3], -1.0, cIPD[2], cIPD[3], w_fp[4] );
+      helas_CD_VSS1_3( w_fp[0], w_fp[2], COUPs[3], -1.0, cIPD[2], cIPD[3], w_fp[4] );
 
       // Amplitude(s) for diagram number 4
-      helas_VSS1_0( w_fp[1], w_fp[3], w_fp[4], COUPs[3], 1.0, &amp_fp[0] );
+      helas_CD_VSS1_0( w_fp[1], w_fp[3], w_fp[4], COUPs[3], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -384,10 +384,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 6 ***
 
       // Wavefunction(s) for diagram number 5
-      helas_VSS1_3( w_fp[0], w_fp[3], COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_CD_VSS1_3( w_fp[0], w_fp[3], COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 5
-      helas_VSS1_0( w_fp[1], w_fp[4], w_fp[2], COUPs[2], 1.0, &amp_fp[0] );
+      helas_CD_VSS1_0( w_fp[1], w_fp[4], w_fp[2], COUPs[2], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -396,10 +396,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 6 OF 6 ***
 
       // Wavefunction(s) for diagram number 6
-      helas_VSS1_3( w_fp[0], w_fp[3], COUPs[3], 1.0, cIPD[2], cIPD[3], w_fp[4] );
+      helas_CD_VSS1_3( w_fp[0], w_fp[3], COUPs[3], 1.0, cIPD[2], cIPD[3], w_fp[4] );
 
       // Amplitude(s) for diagram number 6
-      helas_VSS1_0( w_fp[1], w_fp[2], w_fp[4], COUPs[3], -1.0, &amp_fp[0] );
+      helas_CD_VSS1_0( w_fp[1], w_fp[2], w_fp[4], COUPs[3], -1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/src/HelAmps_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_t1t1.sa/src/HelAmps_MSSM_SLHA2.h
index ed348cf878..afc3bad07e 100644
--- a/epochX/cudacpp/susy_gg_t1t1.sa/src/HelAmps_MSSM_SLHA2.h
+++ b/epochX/cudacpp/susy_gg_t1t1.sa/src/HelAmps_MSSM_SLHA2.h
@@ -1077,78 +1077,147 @@ namespace mg5amcCpu
 
 #ifndef MGONGPU_LINKER_HELAMPS
 
-#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_VSS1_0 VSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_VSS1_2 VSS1_2<W_ACCESS, CD_ACCESS>
-#define helas_VSS1_3 VSS1_3<W_ACCESS, CD_ACCESS>
-#define helas_VVSS1_0 VVSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CD_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1P0_1 VVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_VSS1_0 VSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VSS1_0 VSS1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_VSS1_2 VSS1_2<W_ACCESS, CD_ACCESS>
+#define helas_CI_VSS1_2 VSS1_2<W_ACCESS, CI_ACCESS>
+#define helas_CD_VSS1_3 VSS1_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_VSS1_3 VSS1_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVSS1_0 VVSS1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_VVSS1_0 VVSS1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
 
 #else
 
-#define helas_VVV1P0_1 linker_VVV1P0_1
-#define helas_VSS1_0 linker_VSS1_0
-#define helas_VSS1_2 linker_VSS1_2
-#define helas_VSS1_3 linker_VSS1_3
-#define helas_VVSS1_0 linker_VVSS1_0
+#define helas_CD_VVV1P0_1 linker_CD_VVV1P0_1
+#define helas_CI_VVV1P0_1 linker_CI_VVV1P0_1
+#define helas_CD_VSS1_0 linker_CD_VSS1_0
+#define helas_CI_VSS1_0 linker_CI_VSS1_0
+#define helas_CD_VSS1_2 linker_CD_VSS1_2
+#define helas_CI_VSS1_2 linker_CI_VSS1_2
+#define helas_CD_VSS1_3 linker_CD_VSS1_3
+#define helas_CI_VSS1_3 linker_CI_VSS1_3
+#define helas_CD_VVSS1_0 linker_CD_VVSS1_0
+#define helas_CI_VVSS1_0 linker_CI_VVSS1_0
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] );
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], S2[6], S3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_VSS1_0( const fptype allV1[],
-                 const fptype allS2[],
-                 const fptype allS3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'S2[6]' from the input wavefunctions V1[6], S3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], S2[6], S3[6] (dependent couplings)
   __device__ void
-  linker_VSS1_2( const fptype allV1[],
-                 const fptype allS3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allS2[] );
+  linker_CD_VSS1_0( const fptype allV1[],
+                    const fptype allS2[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], S2[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], S2[6], S3[6] (independent couplings)
   __device__ void
-  linker_VSS1_3( const fptype allV1[],
-                 const fptype allS2[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M3,
-                 const fptype W3,
-                 fptype allS3[] );
+  linker_CI_VSS1_0( const fptype allV1[],
+                    const fptype allS2[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], S3[6], S4[6]
+  // Compute the output wavefunction 'S2[6]' from the input wavefunctions V1[6], S3[6] (dependent couplings)
+  __device__ void
+  linker_CD_VSS1_2( const fptype allV1[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allS2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'S2[6]' from the input wavefunctions V1[6], S3[6] (independent couplings)
+  __device__ void
+  linker_CI_VSS1_2( const fptype allV1[],
+                    const fptype allS3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allS2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], S2[6] (dependent couplings)
+  __device__ void
+  linker_CD_VSS1_3( const fptype allV1[],
+                    const fptype allS2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allS3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'S3[6]' from the input wavefunctions V1[6], S2[6] (independent couplings)
+  __device__ void
+  linker_CI_VSS1_3( const fptype allV1[],
+                    const fptype allS2[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M3,
+                    const fptype W3,
+                    fptype allS3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], S3[6], S4[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVSS1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allS3[],
+                     const fptype allS4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], S3[6], S4[6] (independent couplings)
   __device__ void
-  linker_VVSS1_0( const fptype allV1[],
-                  const fptype allV2[],
-                  const fptype allS3[],
-                  const fptype allS4[],
-                  const fptype allCOUP[],
-                  const double Ccoeff,
-                  fptype allvertexes[] );
+  linker_CI_VVSS1_0( const fptype allV1[],
+                     const fptype allV2[],
+                     const fptype allS3[],
+                     const fptype allS4[],
+                     const fptype allCOUP[],
+                     const double Ccoeff,
+                     fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt
index 1c56a0ab14..e6ba72c45f 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt
+++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt
@@ -576,8 +576,8 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
 INFO: Creating files in directory P1_gg_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1217][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f933000ec40> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7ff06d078c40> [1;30m[export_v4.py at line 6261][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -593,19 +593,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/SubProcesses/P1_gg_ttx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1589][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1613][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1614][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1626][0m [0m
 Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
-Wrote files for 10 helas calls in 0.122 s
+Wrote files for 10 helas calls in 0.121 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.143 s
+ALOHA: aloha creates 2 routines in  0.141 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 4 routines in  0.137 s
+ALOHA: aloha creates 4 routines in  0.140 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -642,9 +642,9 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.966s
-user	0m2.670s
-sys	0m0.293s
+real	0m2.986s
+user	0m2.651s
+sys	0m0.300s
 Code generation completed in 3 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/HelAmps.cc
index 79486e92f0..426da5a2c2 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/HelAmps.cc
@@ -62,63 +62,122 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] )
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
     return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] )
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
   {
     return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] )
+  linker_CI_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CI_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
   {
     return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
   }
 
   //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CI_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
 }
 #endif
diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
index fe52e62418..f3c64c92d4 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
+++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
@@ -332,10 +332,10 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 );
 
-      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], -1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], -1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -346,10 +346,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 3 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], -1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[1], -1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], -1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], -1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -359,10 +359,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 3 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], -1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[1], -1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 3
-      helas_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], -1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], -1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h
index f9d3884aea..d00f4b951b 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h
+++ b/epochX/cudacpp/susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h
@@ -1036,64 +1036,119 @@ namespace mg5amcCpu
 
 #ifndef MGONGPU_LINKER_HELAMPS
 
-#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_CD_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1P0_1 VVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_1 FFV1_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_2 FFV1_2<W_ACCESS, CI_ACCESS>
 
 #else
 
-#define helas_VVV1P0_1 linker_VVV1P0_1
-#define helas_FFV1_0 linker_FFV1_0
-#define helas_FFV1_1 linker_FFV1_1
-#define helas_FFV1_2 linker_FFV1_2
+#define helas_CD_VVV1P0_1 linker_CD_VVV1P0_1
+#define helas_CI_VVV1P0_1 linker_CI_VVV1P0_1
+#define helas_CD_FFV1_0 linker_CD_FFV1_0
+#define helas_CI_FFV1_0 linker_CI_FFV1_0
+#define helas_CD_FFV1_1 linker_CD_FFV1_1
+#define helas_CI_FFV1_1 linker_CI_FFV1_1
+#define helas_CD_FFV1_2 linker_CD_FFV1_2
+#define helas_CI_FFV1_2 linker_CI_FFV1_2
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] );
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] );
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] );
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
 
   //--------------------------------------------------------------------------
 
diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt
index fb28c3393c..9662488371 100644
--- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt
+++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt
@@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.122 s
+1 processes with 3 diagrams generated in 0.126 s
 Total: 1 processes with 3 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt
 Load PLUGIN.CUDACPP_OUTPUT
@@ -581,7 +581,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.142 s
+ALOHA: aloha creates 2 routines in  0.447 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -598,7 +598,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. 
 quit
 
-real	0m1.324s
-user	0m1.248s
+real	0m1.844s
+user	0m1.263s
 sys	0m0.069s
-Code generation completed in 1 seconds
+Code generation completed in 2 seconds
diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/HelAmps.cc b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/HelAmps.cc
index 79486e92f0..426da5a2c2 100644
--- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/HelAmps.cc
+++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/HelAmps.cc
@@ -62,63 +62,122 @@ namespace mg5amcCpu
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] )
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
   {
     return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] )
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
   {
     return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] )
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
   {
     return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
   }
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] )
+  linker_CI_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CI_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
   {
     return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
   }
 
   //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CI_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
 }
 #endif
diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc
index a33ad778ac..7869d6d251 100644
--- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc
+++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc
@@ -332,10 +332,10 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 );
 
-      helas_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] );
 
       // Amplitude(s) for diagram number 1
-      helas_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], -1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], -1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -345,10 +345,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 3 ***
 
       // Wavefunction(s) for diagram number 2
-      helas_FFV1_1( w_fp[2], w_fp[0], COUPs[1], -1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[1], -1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 2
-      helas_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], -1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], -1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
@@ -357,10 +357,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 3 ***
 
       // Wavefunction(s) for diagram number 3
-      helas_FFV1_2( w_fp[3], w_fp[0], COUPs[1], -1.0, cIPD[0], cIPD[1], w_fp[4] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[1], -1.0, cIPD[0], cIPD[1], w_fp[4] );
 
       // Amplitude(s) for diagram number 3
-      helas_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], -1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], -1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473)
 #endif
diff --git a/epochX/cudacpp/susy_gg_tt.sa/src/HelAmps_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_tt.sa/src/HelAmps_MSSM_SLHA2.h
index f9d3884aea..d00f4b951b 100644
--- a/epochX/cudacpp/susy_gg_tt.sa/src/HelAmps_MSSM_SLHA2.h
+++ b/epochX/cudacpp/susy_gg_tt.sa/src/HelAmps_MSSM_SLHA2.h
@@ -1036,64 +1036,119 @@ namespace mg5amcCpu
 
 #ifndef MGONGPU_LINKER_HELAMPS
 
-#define helas_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
-#define helas_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
-#define helas_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_CD_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1P0_1 VVV1P0_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_1 FFV1_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_2 FFV1_2<W_ACCESS, CI_ACCESS>
 
 #else
 
-#define helas_VVV1P0_1 linker_VVV1P0_1
-#define helas_FFV1_0 linker_FFV1_0
-#define helas_FFV1_1 linker_FFV1_1
-#define helas_FFV1_2 linker_FFV1_2
+#define helas_CD_VVV1P0_1 linker_CD_VVV1P0_1
+#define helas_CI_VVV1P0_1 linker_CI_VVV1P0_1
+#define helas_CD_FFV1_0 linker_CD_FFV1_0
+#define helas_CI_FFV1_0 linker_CI_FFV1_0
+#define helas_CD_FFV1_1 linker_CD_FFV1_1
+#define helas_CI_FFV1_1 linker_CI_FFV1_1
+#define helas_CD_FFV1_2 linker_CD_FFV1_2
+#define helas_CI_FFV1_2 linker_CI_FFV1_2
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_VVV1P0_1( const fptype allV2[],
-                   const fptype allV3[],
-                   const fptype allCOUP[],
-                   const double Ccoeff,
-                   const fptype M1,
-                   const fptype W1,
-                   fptype allV1[] );
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6]
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_0( const fptype allF1[],
-                 const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 fptype allvertexes[] );
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
   __device__ void
-  linker_FFV1_1( const fptype allF2[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M1,
-                 const fptype W1,
-                 fptype allF1[] );
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
 
   //--------------------------------------------------------------------------
 
-  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6]
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
   __device__ void
-  linker_FFV1_2( const fptype allF1[],
-                 const fptype allV3[],
-                 const fptype allCOUP[],
-                 const double Ccoeff,
-                 const fptype M2,
-                 const fptype W2,
-                 fptype allF2[] );
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
 
   //--------------------------------------------------------------------------
 

From b1f79a8215a5191063671f2ea9e7b01d11a82c9a Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Fri, 30 Aug 2024 07:14:45 +0200
Subject: [PATCH 34/50] [helas] rerun tput tests (now 120 including inlL,
 previously 102) on itscrd90 - all ok

STARTED  AT Thu Aug 29 09:00:35 PM CEST 2024
./tput/teeThroughputX.sh -mix -hrd -makej -eemumu -ggtt -ggttg -ggttgg -gqttq -ggttggg -makeclean
ENDED(1) AT Thu Aug 29 11:03:48 PM CEST 2024 [Status=0]
./tput/teeThroughputX.sh -flt -hrd -makej -eemumu -ggtt -ggttgg -inlonly -makeclean
ENDED(2) AT Thu Aug 29 11:24:34 PM CEST 2024 [Status=0]
./tput/teeThroughputX.sh -makej -eemumu -ggtt -ggttg -gqttq -ggttgg -ggttggg -flt -bridge -makeclean
ENDED(3) AT Thu Aug 29 11:33:08 PM CEST 2024 [Status=0]
./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -rmbhst
ENDED(4) AT Thu Aug 29 11:35:56 PM CEST 2024 [Status=0]
./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -curhst
ENDED(5) AT Thu Aug 29 11:38:41 PM CEST 2024 [Status=0]
./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -common
ENDED(6) AT Thu Aug 29 11:41:32 PM CEST 2024 [Status=0]
./tput/teeThroughputX.sh -mix -hrd -makej -susyggtt -susyggt1t1 -smeftggtttt -heftggbb -makeclean
ENDED(7) AT Fri Aug 30 12:12:36 AM CEST 2024 [Status=0]
./tput/teeThroughputX.sh -inlLonly -mix -makej -eemumu -ggtt -ggttg -ggttgg -gqttq -ggttggg -makeclean
ENDED(8) AT Fri Aug 30 12:48:22 AM CEST 2024 [Status=0]

Note: inlL build times are reduced by a factor 2 to 3 in inlL with respect to inl0 in the complex processes like ggttggg
----------------
tput/logs_ggttggg_mad/log_ggttggg_mad_d_inlL_hrd0.txt
Preliminary build completed in 0d 00h 07m 12s
tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
Preliminary build completed in 0d 00h 14m 20s
----------------
tput/logs_ggttggg_mad/log_ggttggg_mad_f_inlL_hrd0.txt
Preliminary build completed in 0d 00h 05m 39s
tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt
Preliminary build completed in 0d 00h 13m 34s
----------------
tput/logs_ggttggg_mad/log_ggttggg_mad_m_inlL_hrd0.txt
Preliminary build completed in 0d 00h 05m 55s
tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt
Preliminary build completed in 0d 00h 14m 56s
----------------

Note also: there is a runtime performance slowdown of around 10% in both cuda and c++.
(I had previously observed that cuda seems faster, but this was with a small grid! Using a large grid, cuda is also slower)

diff -u --color tput/logs_ggttggg_mad/log_ggttggg_mad_d_inlL_hrd0.txt  tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
 ------------------------------------------------
-Preliminary build completed in 0d 00h 07m 12s
+Preliminary build completed in 0d 00h 14m 20s
 ------------------------------------------------

(CUDA small grid, HELINL=L is 10% faster)
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inlL_hrd0/check_cuda.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.337724e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.338199e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.338376e+02                 )  sec^-1
-MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     2.243520 sec
-INFO: No Floating Point Exceptions have been reported
-     7,333,011,251      cycles                           #    2.895 GHz
-    16,571,702,127      instructions                     #    2.26  insn per cycle
-       2.591709636 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inlL_hrd0/check_cuda.exe -p 1 256 1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.074025e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.074408e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.074613e+02                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
+TOTAL       :     2.427313 sec
+INFO: No Floating Point Exceptions have been reported
+     8,007,770,360      cycles                           #    2.905 GHz
+    17,844,373,075      instructions                     #    2.23  insn per cycle
+       2.813382822 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%

(CUDA large grid, HELINL=L is 10% slower)
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inlL_hrd0/check_cuda.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.489870e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.491766e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.491994e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.214624e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.216736e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.217011e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856249e-04 +- 8.329951e-05 )  GeV^-6
-TOTAL       :     4.301800 sec
+TOTAL       :     4.008082 sec
 INFO: No Floating Point Exceptions have been reported
-    13,363,583,535      cycles                           #    2.902 GHz
-    29,144,223,391      instructions                     #    2.18  insn per cycle
-       4.658949907 seconds time elapsed
+    12,658,170,825      cycles                           #    2.916 GHz
+    27,773,386,314      instructions                     #    2.19  insn per cycle
+       4.398692801 seconds time elapsed

(C++, HELINL=L is 10% slower)
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inlL_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.478898e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.479341e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.479341e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.848619e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.849166e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.849166e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.518979 sec
+TOTAL       :     1.373871 sec
 INFO: No Floating Point Exceptions have been reported
-     4,109,801,969      cycles                           #    2.699 GHz
-     9,072,472,376      instructions                     #    2.21  insn per cycle
-       1.523113813 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:94048) (512y:   91) (512z:    0)
+     3,731,717,521      cycles                           #    2.710 GHz
+     8,514,052,827      instructions                     #    2.28  insn per cycle
+       1.377919646 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:80619) (512y:   89) (512z:    0)
---
 .../log_eemumu_mad_d_inl0_hrd0.txt            |  90 ++++---
 .../log_eemumu_mad_d_inl0_hrd0_bridge.txt     |  90 ++++---
 .../log_eemumu_mad_d_inl0_hrd0_common.txt     |  90 ++++---
 .../log_eemumu_mad_d_inl0_hrd0_curhst.txt     |  90 ++++---
 .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt     |  90 ++++---
 .../log_eemumu_mad_d_inl0_hrd1.txt            |  90 ++++---
 .../log_eemumu_mad_d_inl1_hrd0.txt            |  90 ++++---
 .../log_eemumu_mad_d_inl1_hrd1.txt            |  90 ++++---
 .../log_eemumu_mad_d_inlL_hrd0.txt            | 229 ++++++++++++++++
 .../log_eemumu_mad_f_inl0_hrd0.txt            |  90 ++++---
 .../log_eemumu_mad_f_inl0_hrd0_bridge.txt     |  90 ++++---
 .../log_eemumu_mad_f_inl0_hrd0_common.txt     |  90 ++++---
 .../log_eemumu_mad_f_inl0_hrd0_curhst.txt     |  90 ++++---
 .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt     |  90 ++++---
 .../log_eemumu_mad_f_inl0_hrd1.txt            |  90 ++++---
 .../log_eemumu_mad_f_inl1_hrd0.txt            |  90 ++++---
 .../log_eemumu_mad_f_inl1_hrd1.txt            |  90 ++++---
 .../log_eemumu_mad_f_inlL_hrd0.txt            | 229 ++++++++++++++++
 .../log_eemumu_mad_m_inl0_hrd0.txt            |  90 ++++---
 .../log_eemumu_mad_m_inl0_hrd1.txt            |  90 ++++---
 .../log_eemumu_mad_m_inlL_hrd0.txt            | 229 ++++++++++++++++
 .../log_ggtt_mad_d_inl0_hrd0.txt              |  88 +++----
 .../log_ggtt_mad_d_inl0_hrd0_bridge.txt       |  90 ++++---
 .../log_ggtt_mad_d_inl0_hrd0_common.txt       |  90 ++++---
 .../log_ggtt_mad_d_inl0_hrd0_curhst.txt       |  90 ++++---
 .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt       |  90 ++++---
 .../log_ggtt_mad_d_inl0_hrd1.txt              |  90 ++++---
 .../log_ggtt_mad_d_inl1_hrd0.txt              |  88 +++----
 .../log_ggtt_mad_d_inl1_hrd1.txt              |  90 ++++---
 .../log_ggtt_mad_d_inlL_hrd0.txt              |  88 +++----
 .../log_ggtt_mad_f_inl0_hrd0.txt              |  90 ++++---
 .../log_ggtt_mad_f_inl0_hrd0_bridge.txt       |  90 ++++---
 .../log_ggtt_mad_f_inl0_hrd0_common.txt       |  90 ++++---
 .../log_ggtt_mad_f_inl0_hrd0_curhst.txt       |  90 ++++---
 .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt       |  90 ++++---
 .../log_ggtt_mad_f_inl0_hrd1.txt              |  90 ++++---
 .../log_ggtt_mad_f_inl1_hrd0.txt              |  90 ++++---
 .../log_ggtt_mad_f_inl1_hrd1.txt              |  90 ++++---
 .../log_ggtt_mad_f_inlL_hrd0.txt              | 229 ++++++++++++++++
 .../log_ggtt_mad_m_inl0_hrd0.txt              |  90 ++++---
 .../log_ggtt_mad_m_inl0_hrd1.txt              |  90 ++++---
 .../log_ggtt_mad_m_inlL_hrd0.txt              | 229 ++++++++++++++++
 .../log_ggttg_mad_d_inl0_hrd0.txt             | 104 ++++----
 .../log_ggttg_mad_d_inl0_hrd0_bridge.txt      | 104 ++++----
 .../log_ggttg_mad_d_inl0_hrd1.txt             | 104 ++++----
 .../log_ggttg_mad_d_inlL_hrd0.txt             | 244 ++++++++++++++++++
 .../log_ggttg_mad_f_inl0_hrd0.txt             | 104 ++++----
 .../log_ggttg_mad_f_inl0_hrd0_bridge.txt      | 104 ++++----
 .../log_ggttg_mad_f_inl0_hrd1.txt             | 104 ++++----
 .../log_ggttg_mad_f_inlL_hrd0.txt             | 244 ++++++++++++++++++
 .../log_ggttg_mad_m_inl0_hrd0.txt             | 104 ++++----
 .../log_ggttg_mad_m_inl0_hrd1.txt             | 104 ++++----
 .../log_ggttg_mad_m_inlL_hrd0.txt             | 244 ++++++++++++++++++
 .../log_ggttgg_mad_d_inl0_hrd0.txt            | 104 ++++----
 .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt     | 104 ++++----
 .../log_ggttgg_mad_d_inl0_hrd0_common.txt     | 104 ++++----
 .../log_ggttgg_mad_d_inl0_hrd0_curhst.txt     | 104 ++++----
 .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt     | 104 ++++----
 .../log_ggttgg_mad_d_inl0_hrd1.txt            | 104 ++++----
 .../log_ggttgg_mad_d_inl1_hrd0.txt            | 104 ++++----
 .../log_ggttgg_mad_d_inl1_hrd1.txt            | 104 ++++----
 .../log_ggttgg_mad_d_inlL_hrd0.txt            | 244 ++++++++++++++++++
 .../log_ggttgg_mad_f_inl0_hrd0.txt            | 104 ++++----
 .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt     | 104 ++++----
 .../log_ggttgg_mad_f_inl0_hrd0_common.txt     | 104 ++++----
 .../log_ggttgg_mad_f_inl0_hrd0_curhst.txt     | 104 ++++----
 .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt     | 104 ++++----
 .../log_ggttgg_mad_f_inl0_hrd1.txt            | 104 ++++----
 .../log_ggttgg_mad_f_inl1_hrd0.txt            | 104 ++++----
 .../log_ggttgg_mad_f_inl1_hrd1.txt            | 104 ++++----
 .../log_ggttgg_mad_f_inlL_hrd0.txt            | 244 ++++++++++++++++++
 .../log_ggttgg_mad_m_inl0_hrd0.txt            | 104 ++++----
 .../log_ggttgg_mad_m_inl0_hrd1.txt            | 104 ++++----
 .../log_ggttgg_mad_m_inlL_hrd0.txt            | 244 ++++++++++++++++++
 .../log_ggttggg_mad_d_inl0_hrd0.txt           | 102 ++++----
 .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt    | 104 ++++----
 .../log_ggttggg_mad_d_inl0_hrd1.txt           | 104 ++++----
 .../log_ggttggg_mad_d_inlL_hrd0.txt           | 102 ++++----
 .../log_ggttggg_mad_f_inl0_hrd0.txt           | 104 ++++----
 .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt    | 104 ++++----
 .../log_ggttggg_mad_f_inl0_hrd1.txt           | 104 ++++----
 .../log_ggttggg_mad_f_inlL_hrd0.txt           | 244 ++++++++++++++++++
 .../log_ggttggg_mad_m_inl0_hrd0.txt           | 104 ++++----
 .../log_ggttggg_mad_m_inl0_hrd1.txt           | 104 ++++----
 .../log_ggttggg_mad_m_inlL_hrd0.txt           | 244 ++++++++++++++++++
 .../log_gqttq_mad_d_inl0_hrd0.txt             | 104 ++++----
 .../log_gqttq_mad_d_inl0_hrd0_bridge.txt      | 104 ++++----
 .../log_gqttq_mad_d_inl0_hrd1.txt             | 104 ++++----
 .../log_gqttq_mad_d_inlL_hrd0.txt             | 244 ++++++++++++++++++
 .../log_gqttq_mad_f_inl0_hrd0.txt             | 104 ++++----
 .../log_gqttq_mad_f_inl0_hrd0_bridge.txt      | 104 ++++----
 .../log_gqttq_mad_f_inl0_hrd1.txt             | 104 ++++----
 .../log_gqttq_mad_f_inlL_hrd0.txt             | 244 ++++++++++++++++++
 .../log_gqttq_mad_m_inl0_hrd0.txt             | 104 ++++----
 .../log_gqttq_mad_m_inl0_hrd1.txt             | 104 ++++----
 .../log_gqttq_mad_m_inlL_hrd0.txt             | 244 ++++++++++++++++++
 .../log_heftggbb_mad_d_inl0_hrd0.txt          |  90 ++++---
 .../log_heftggbb_mad_d_inl0_hrd1.txt          |  90 ++++---
 .../log_heftggbb_mad_f_inl0_hrd0.txt          |  90 ++++---
 .../log_heftggbb_mad_f_inl0_hrd1.txt          |  90 ++++---
 .../log_heftggbb_mad_m_inl0_hrd0.txt          |  90 ++++---
 .../log_heftggbb_mad_m_inl0_hrd1.txt          |  90 ++++---
 .../log_smeftggtttt_mad_d_inl0_hrd0.txt       | 104 ++++----
 .../log_smeftggtttt_mad_d_inl0_hrd1.txt       | 104 ++++----
 .../log_smeftggtttt_mad_f_inl0_hrd0.txt       | 104 ++++----
 .../log_smeftggtttt_mad_f_inl0_hrd1.txt       | 104 ++++----
 .../log_smeftggtttt_mad_m_inl0_hrd0.txt       | 104 ++++----
 .../log_smeftggtttt_mad_m_inl0_hrd1.txt       | 104 ++++----
 .../log_susyggt1t1_mad_d_inl0_hrd0.txt        |  90 ++++---
 .../log_susyggt1t1_mad_d_inl0_hrd1.txt        |  90 ++++---
 .../log_susyggt1t1_mad_f_inl0_hrd0.txt        |  90 ++++---
 .../log_susyggt1t1_mad_f_inl0_hrd1.txt        |  90 ++++---
 .../log_susyggt1t1_mad_m_inl0_hrd0.txt        |  90 ++++---
 .../log_susyggt1t1_mad_m_inl0_hrd1.txt        |  90 ++++---
 .../log_susyggtt_mad_d_inl0_hrd0.txt          |  90 ++++---
 .../log_susyggtt_mad_d_inl0_hrd1.txt          |  90 ++++---
 .../log_susyggtt_mad_f_inl0_hrd0.txt          |  90 ++++---
 .../log_susyggtt_mad_f_inl0_hrd1.txt          |  90 ++++---
 .../log_susyggtt_mad_m_inl0_hrd0.txt          |  90 ++++---
 .../log_susyggtt_mad_m_inl0_hrd1.txt          |  90 ++++---
 120 files changed, 9045 insertions(+), 4820 deletions(-)
 create mode 100644 epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inlL_hrd0.txt
 create mode 100644 epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inlL_hrd0.txt
 create mode 100644 epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inlL_hrd0.txt
 create mode 100644 epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inlL_hrd0.txt
 create mode 100644 epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inlL_hrd0.txt
 create mode 100644 epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inlL_hrd0.txt
 create mode 100644 epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inlL_hrd0.txt
 create mode 100644 epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inlL_hrd0.txt
 create mode 100644 epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inlL_hrd0.txt
 create mode 100644 epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inlL_hrd0.txt
 create mode 100644 epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inlL_hrd0.txt
 create mode 100644 epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inlL_hrd0.txt
 create mode 100644 epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inlL_hrd0.txt
 create mode 100644 epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inlL_hrd0.txt
 create mode 100644 epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inlL_hrd0.txt
 create mode 100644 epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inlL_hrd0.txt

diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt
index ad26491862..d8009f6b3e 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 15s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-08_19:47:50
+DATE: 2024-08-29_22:45:26
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.598959e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.638501e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.177835e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.803809e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.717028e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.166742e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     0.698559 sec
+TOTAL       :     0.901125 sec
 INFO: No Floating Point Exceptions have been reported
-     2,601,897,002      cycles                           #    2.808 GHz                    
-     4,040,507,104      instructions                     #    1.55  insn per cycle         
-       0.999350103 seconds time elapsed
+     2,657,428,250      cycles                           #    2.848 GHz                    
+     4,095,613,701      instructions                     #    1.54  insn per cycle         
+       1.213739460 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.054108e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.229313e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.229313e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.028409e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.198012e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.198012e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     6.402837 sec
+TOTAL       :     6.530303 sec
 INFO: No Floating Point Exceptions have been reported
-    19,233,855,272      cycles                           #    3.000 GHz                    
-    46,180,507,769      instructions                     #    2.40  insn per cycle         
-       6.412153445 seconds time elapsed
+    19,075,896,141      cycles                           #    2.919 GHz                    
+    46,074,311,860      instructions                     #    2.42  insn per cycle         
+       6.535837772 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  463) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.601848e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.093713e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.093713e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.570712e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.047478e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.047478e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.363298 sec
+TOTAL       :     4.420809 sec
 INFO: No Floating Point Exceptions have been reported
-    13,100,720,322      cycles                           #    2.997 GHz                    
-    31,716,075,564      instructions                     #    2.42  insn per cycle         
-       4.372588931 seconds time elapsed
+    12,935,671,986      cycles                           #    2.923 GHz                    
+    31,611,096,814      instructions                     #    2.44  insn per cycle         
+       4.426216014 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1664) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.042973e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.858628e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.858628e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.982246e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.749049e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.749049e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.509207 sec
+TOTAL       :     3.584392 sec
 INFO: No Floating Point Exceptions have been reported
-    10,205,028,097      cycles                           #    2.901 GHz                    
-    19,707,283,623      instructions                     #    1.93  insn per cycle         
-       3.518316321 seconds time elapsed
+    10,005,997,843      cycles                           #    2.788 GHz                    
+    19,602,170,267      instructions                     #    1.96  insn per cycle         
+       3.589884459 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1946) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.068954e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.924439e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.924439e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.034742e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.851215e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.851215e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.473859 sec
+TOTAL       :     3.501989 sec
 INFO: No Floating Point Exceptions have been reported
-    10,004,130,884      cycles                           #    2.873 GHz                    
-    19,357,111,804      instructions                     #    1.93  insn per cycle         
-       3.483068816 seconds time elapsed
+     9,776,595,596      cycles                           #    2.788 GHz                    
+    19,251,276,525      instructions                     #    1.97  insn per cycle         
+       3.507337404 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1685) (512y:  178) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.804457e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.421604e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.421604e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.721753e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.269385e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.269385e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.921670 sec
+TOTAL       :     4.065497 sec
 INFO: No Floating Point Exceptions have been reported
-     8,766,336,363      cycles                           #    2.231 GHz                    
-    15,830,799,810      instructions                     #    1.81  insn per cycle         
-       3.930866073 seconds time elapsed
+     8,584,082,989      cycles                           #    2.109 GHz                    
+    15,723,059,479      instructions                     #    1.83  insn per cycle         
+       4.070951861 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  880) (512y:  156) (512z: 1257)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt
index 254ccc5cd6..15c86e64f8 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 13s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-08_20:16:34
+DATE: 2024-08-29_23:27:07
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +57,15 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.859786e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.167324e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.167324e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.624887e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.702462e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.702462e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     2.182775 sec
+TOTAL       :     2.264047 sec
 INFO: No Floating Point Exceptions have been reported
-     7,222,143,773      cycles                           #    2.974 GHz                    
-    12,988,458,578      instructions                     #    1.80  insn per cycle         
-       2.484589357 seconds time elapsed
+     7,265,794,550      cycles                           #    2.898 GHz                    
+    13,119,795,658      instructions                     #    1.81  insn per cycle         
+       2.566395516 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
@@ -91,15 +95,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.023014e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.186587e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.186587e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.965625e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.154434e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.154434e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     6.792659 sec
+TOTAL       :     6.932007 sec
 INFO: No Floating Point Exceptions have been reported
-    20,463,079,955      cycles                           #    3.008 GHz                    
-    46,412,955,093      instructions                     #    2.27  insn per cycle         
-       6.804041518 seconds time elapsed
+    20,279,302,832      cycles                           #    2.923 GHz                    
+    46,300,903,612      instructions                     #    2.28  insn per cycle         
+       6.938668002 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  463) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -121,15 +125,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.536442e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.970461e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.970461e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.493239e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.913977e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.913977e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.741441 sec
+TOTAL       :     4.834619 sec
 INFO: No Floating Point Exceptions have been reported
-    14,332,452,862      cycles                           #    3.016 GHz                    
-    32,573,923,419      instructions                     #    2.27  insn per cycle         
-       4.753137415 seconds time elapsed
+    14,076,513,939      cycles                           #    2.908 GHz                    
+    32,453,787,450      instructions                     #    2.31  insn per cycle         
+       4.841215294 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1664) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -151,15 +155,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.834595e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.507335e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.507335e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.862502e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.532080e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.532080e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.104610 sec
+TOTAL       :     4.005847 sec
 INFO: No Floating Point Exceptions have been reported
-    11,547,104,567      cycles                           #    2.806 GHz                    
-    21,093,610,719      instructions                     #    1.83  insn per cycle         
-       4.116807687 seconds time elapsed
+    11,211,606,490      cycles                           #    2.795 GHz                    
+    20,962,455,249      instructions                     #    1.87  insn per cycle         
+       4.012539970 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1946) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -181,15 +185,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.917747e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.629096e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.629096e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.896245e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.585584e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.585584e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.937807 sec
+TOTAL       :     3.947050 sec
 INFO: No Floating Point Exceptions have been reported
-    11,279,300,088      cycles                           #    2.856 GHz                    
-    20,732,054,777      instructions                     #    1.84  insn per cycle         
-       3.949582750 seconds time elapsed
+    11,037,536,290      cycles                           #    2.793 GHz                    
+    20,609,974,645      instructions                     #    1.87  insn per cycle         
+       3.953712362 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1685) (512y:  178) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -211,15 +215,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.634373e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.159831e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.159831e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.615205e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.087554e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.087554e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.550735 sec
+TOTAL       :     4.531367 sec
 INFO: No Floating Point Exceptions have been reported
-    10,336,377,696      cycles                           #    2.266 GHz                    
-    17,023,763,380      instructions                     #    1.65  insn per cycle         
-       4.562764893 seconds time elapsed
+     9,875,108,333      cycles                           #    2.177 GHz                    
+    16,869,911,181      instructions                     #    1.71  insn per cycle         
+       4.538028145 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  880) (512y:  156) (512z: 1257)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt
index a17dc8d37a..92ef4f6f2f 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 01s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-08_20:28:00
+DATE: 2024-08-29_23:38:45
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.117423e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.844085e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.131938e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.808918e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.668095e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.116734e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371879e-02 +- 3.270020e-06 )  GeV^0
-TOTAL       :     1.358559 sec
+TOTAL       :     1.369794 sec
 INFO: No Floating Point Exceptions have been reported
-     4,616,681,568      cycles                           #    2.947 GHz                    
-     7,101,035,160      instructions                     #    1.54  insn per cycle         
-       1.643879361 seconds time elapsed
+     4,608,699,021      cycles                           #    2.885 GHz                    
+     7,094,580,310      instructions                     #    1.54  insn per cycle         
+       1.655641768 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.047167e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.219441e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.219441e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.028121e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.196836e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.196836e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371879e-02 +- 3.270020e-06 )  GeV^0
-TOTAL       :     6.877625 sec
+TOTAL       :     6.905360 sec
 INFO: No Floating Point Exceptions have been reported
-    20,474,853,896      cycles                           #    2.975 GHz                    
-    46,476,031,399      instructions                     #    2.27  insn per cycle         
-       6.883195189 seconds time elapsed
+    20,204,972,387      cycles                           #    2.924 GHz                    
+    46,176,989,685      instructions                     #    2.29  insn per cycle         
+       6.910743496 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  463) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.613543e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.104302e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.104302e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.559907e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.028662e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.028662e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371879e-02 +- 3.270020e-06 )  GeV^0
-TOTAL       :     4.762997 sec
+TOTAL       :     4.821292 sec
 INFO: No Floating Point Exceptions have been reported
-    14,341,567,999      cycles                           #    3.008 GHz                    
-    31,906,796,447      instructions                     #    2.22  insn per cycle         
-       4.768768263 seconds time elapsed
+    14,003,649,408      cycles                           #    2.902 GHz                    
+    31,613,857,025      instructions                     #    2.26  insn per cycle         
+       4.826776329 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1664) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.037523e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.848398e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.848398e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.980860e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.738326e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.738326e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371879e-02 +- 3.270020e-06 )  GeV^0
-TOTAL       :     3.928675 sec
+TOTAL       :     3.957963 sec
 INFO: No Floating Point Exceptions have been reported
-    11,431,967,131      cycles                           #    2.907 GHz                    
-    19,749,163,356      instructions                     #    1.73  insn per cycle         
-       3.934544865 seconds time elapsed
+    11,113,367,910      cycles                           #    2.805 GHz                    
+    19,502,073,857      instructions                     #    1.75  insn per cycle         
+       3.963320231 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1946) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.057561e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.903205e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.903205e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.009415e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.801431e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.801431e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371879e-02 +- 3.270020e-06 )  GeV^0
-TOTAL       :     3.914555 sec
+TOTAL       :     3.927255 sec
 INFO: No Floating Point Exceptions have been reported
-    11,301,789,336      cycles                           #    2.884 GHz                    
-    19,198,978,685      instructions                     #    1.70  insn per cycle         
-       3.919932247 seconds time elapsed
+    10,925,795,299      cycles                           #    2.779 GHz                    
+    18,950,076,288      instructions                     #    1.73  insn per cycle         
+       3.932774858 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1685) (512y:  178) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.792077e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.384424e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.384424e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.705280e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.276454e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.276454e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371879e-02 +- 3.270020e-06 )  GeV^0
-TOTAL       :     4.355139 sec
+TOTAL       :     4.480076 sec
 INFO: No Floating Point Exceptions have been reported
-     9,975,675,333      cycles                           #    2.288 GHz                    
-    15,643,574,075      instructions                     #    1.57  insn per cycle         
-       4.360684158 seconds time elapsed
+     9,807,871,817      cycles                           #    2.187 GHz                    
+    15,426,367,312      instructions                     #    1.57  insn per cycle         
+       4.485479236 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  880) (512y:  156) (512z: 1257)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt
index 02f69b4d1c..10b54c1ef0 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 00s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-08_20:25:18
+DATE: 2024-08-29_23:35:59
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.161167e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.790408e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.166295e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.797696e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.680633e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.151980e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     0.968841 sec
+TOTAL       :     0.987290 sec
 INFO: No Floating Point Exceptions have been reported
-     3,539,663,050      cycles                           #    2.958 GHz                    
-     6,992,486,553      instructions                     #    1.98  insn per cycle         
-       1.255291189 seconds time elapsed
+     3,494,786,482      cycles                           #    2.873 GHz                    
+     6,971,888,639      instructions                     #    1.99  insn per cycle         
+       1.273208272 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.054864e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.230420e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.230420e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.029433e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.198441e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.198441e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     6.368076 sec
+TOTAL       :     6.521930 sec
 INFO: No Floating Point Exceptions have been reported
-    19,096,334,706      cycles                           #    2.997 GHz                    
-    46,076,716,123      instructions                     #    2.41  insn per cycle         
-       6.373662191 seconds time elapsed
+    19,054,645,226      cycles                           #    2.920 GHz                    
+    46,074,013,678      instructions                     #    2.42  insn per cycle         
+       6.527229224 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  463) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.601324e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.083048e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.083048e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.573928e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.046992e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.046992e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.335443 sec
+TOTAL       :     4.412728 sec
 INFO: No Floating Point Exceptions have been reported
-    12,960,942,150      cycles                           #    2.986 GHz                    
-    31,610,247,350      instructions                     #    2.44  insn per cycle         
-       4.340962885 seconds time elapsed
+    12,916,865,926      cycles                           #    2.924 GHz                    
+    31,611,168,331      instructions                     #    2.45  insn per cycle         
+       4.418232205 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1664) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.037265e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.842019e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.842019e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.976115e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.748052e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.748052e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.487108 sec
+TOTAL       :     3.594982 sec
 INFO: No Floating Point Exceptions have been reported
-    10,064,000,379      cycles                           #    2.882 GHz                    
-    19,599,635,012      instructions                     #    1.95  insn per cycle         
-       3.492608891 seconds time elapsed
+    10,006,568,639      cycles                           #    2.780 GHz                    
+    19,599,883,322      instructions                     #    1.96  insn per cycle         
+       3.600617857 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1946) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.083703e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.929723e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.929723e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.035014e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.847591e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.847591e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.417011 sec
+TOTAL       :     3.500601 sec
 INFO: No Floating Point Exceptions have been reported
-     9,860,886,386      cycles                           #    2.882 GHz                    
-    19,261,098,945      instructions                     #    1.95  insn per cycle         
-       3.422241820 seconds time elapsed
+     9,800,712,040      cycles                           #    2.796 GHz                    
+    19,261,863,073      instructions                     #    1.97  insn per cycle         
+       3.506017349 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1685) (512y:  178) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.806629e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.401308e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.401308e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.727394e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.281204e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.281204e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.881256 sec
+TOTAL       :     4.053717 sec
 INFO: No Floating Point Exceptions have been reported
-     8,602,524,027      cycles                           #    2.214 GHz                    
-    15,722,205,670      instructions                     #    1.83  insn per cycle         
-       3.886723200 seconds time elapsed
+     8,568,345,184      cycles                           #    2.111 GHz                    
+    15,722,985,461      instructions                     #    1.84  insn per cycle         
+       4.059101293 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  880) (512y:  156) (512z: 1257)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt
index 35f9b1d01f..fe5153c071 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 01s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-08_20:22:33
+DATE: 2024-08-29_23:33:12
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -50,15 +54,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.201911e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.800503e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.039847e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.065171e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.636973e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.050281e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     1.856881 sec
+TOTAL       :     1.887176 sec
 INFO: No Floating Point Exceptions have been reported
-     6,224,640,386      cycles                           #    2.971 GHz                    
-    11,427,865,713      instructions                     #    1.84  insn per cycle         
-       2.153600888 seconds time elapsed
+     6,145,978,060      cycles                           #    2.903 GHz                    
+    11,435,822,809      instructions                     #    1.86  insn per cycle         
+       2.173027806 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166
@@ -84,15 +88,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.044821e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.217145e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.217145e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.029479e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.199057e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.199057e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     6.426882 sec
+TOTAL       :     6.523129 sec
 INFO: No Floating Point Exceptions have been reported
-    19,111,682,358      cycles                           #    2.975 GHz                    
-    46,077,003,649      instructions                     #    2.41  insn per cycle         
-       6.432401292 seconds time elapsed
+    19,067,053,861      cycles                           #    2.921 GHz                    
+    46,077,405,536      instructions                     #    2.42  insn per cycle         
+       6.528437903 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  463) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -113,15 +117,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.618749e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.109823e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.109823e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.548471e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.011932e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.011932e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.289727 sec
+TOTAL       :     4.480294 sec
 INFO: No Floating Point Exceptions have been reported
-    12,954,885,068      cycles                           #    3.017 GHz                    
-    31,610,318,935      instructions                     #    2.44  insn per cycle         
-       4.295110036 seconds time elapsed
+    12,970,362,518      cycles                           #    2.896 GHz                    
+    31,615,405,279      instructions                     #    2.44  insn per cycle         
+       4.485805448 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1664) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -142,15 +146,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.027068e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.831891e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.831891e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.990807e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.763259e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.763259e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.501218 sec
+TOTAL       :     3.568074 sec
 INFO: No Floating Point Exceptions have been reported
-    10,084,953,651      cycles                           #    2.877 GHz                    
-    19,599,538,271      instructions                     #    1.94  insn per cycle         
-       3.506570863 seconds time elapsed
+     9,997,994,130      cycles                           #    2.799 GHz                    
+    19,601,663,150      instructions                     #    1.96  insn per cycle         
+       3.573387334 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1946) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -171,15 +175,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.095436e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.953376e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.953376e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.031513e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.838493e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.838493e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.399978 sec
+TOTAL       :     3.506716 sec
 INFO: No Floating Point Exceptions have been reported
-     9,825,140,072      cycles                           #    2.886 GHz                    
-    19,248,188,821      instructions                     #    1.96  insn per cycle         
-       3.405318176 seconds time elapsed
+     9,800,646,915      cycles                           #    2.791 GHz                    
+    19,248,975,591      instructions                     #    1.96  insn per cycle         
+       3.512205386 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1685) (512y:  178) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -200,15 +204,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.764156e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.337626e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.337626e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.737151e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.291381e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.291381e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.973951 sec
+TOTAL       :     4.030896 sec
 INFO: No Floating Point Exceptions have been reported
-     8,632,225,098      cycles                           #    2.170 GHz                    
-    15,724,542,893      instructions                     #    1.82  insn per cycle         
-       3.979226146 seconds time elapsed
+     8,561,312,861      cycles                           #    2.122 GHz                    
+    15,725,090,197      instructions                     #    1.84  insn per cycle         
+       4.036222457 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  880) (512y:  156) (512z: 1257)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt
index 30013486b3..bf5eae53fe 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 49s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-08_19:48:21
+DATE: 2024-08-29_22:45:57
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.631857e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.952875e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.229430e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.814972e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.742109e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.212342e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     0.661237 sec
+TOTAL       :     0.677183 sec
 INFO: No Floating Point Exceptions have been reported
-     2,635,614,506      cycles                           #    2.952 GHz                    
-     4,105,447,914      instructions                     #    1.56  insn per cycle         
-       0.952322039 seconds time elapsed
+     2,613,090,444      cycles                           #    2.872 GHz                    
+     4,047,105,274      instructions                     #    1.55  insn per cycle         
+       0.969507477 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.051765e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.227570e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.227570e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.029099e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.198310e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.198310e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     6.414969 sec
+TOTAL       :     6.524250 sec
 INFO: No Floating Point Exceptions have been reported
-    19,212,287,097      cycles                           #    2.991 GHz                    
-    46,135,858,785      instructions                     #    2.40  insn per cycle         
-       6.423899634 seconds time elapsed
+    19,046,558,619      cycles                           #    2.917 GHz                    
+    46,035,154,416      instructions                     #    2.42  insn per cycle         
+       6.529531773 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  452) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.601077e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.094081e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.094081e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.564780e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.031977e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.031977e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.367872 sec
+TOTAL       :     4.434953 sec
 INFO: No Floating Point Exceptions have been reported
-    13,124,994,280      cycles                           #    3.000 GHz                    
-    31,690,002,602      instructions                     #    2.41  insn per cycle         
-       4.377128729 seconds time elapsed
+    12,896,963,599      cycles                           #    2.905 GHz                    
+    31,585,225,315      instructions                     #    2.45  insn per cycle         
+       4.440300371 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1650) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.022628e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.826530e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.826530e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.967682e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.721566e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.721566e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.545071 sec
+TOTAL       :     3.607220 sec
 INFO: No Floating Point Exceptions have been reported
-    10,210,134,759      cycles                           #    2.873 GHz                    
-    19,686,352,650      instructions                     #    1.93  insn per cycle         
-       3.554081422 seconds time elapsed
+     9,997,689,808      cycles                           #    2.768 GHz                    
+    19,580,598,841      instructions                     #    1.96  insn per cycle         
+       3.612721980 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1929) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.045349e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.884198e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.884198e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.015746e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.813424e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.813424e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.513122 sec
+TOTAL       :     3.532733 sec
 INFO: No Floating Point Exceptions have been reported
-    10,000,248,812      cycles                           #    2.840 GHz                    
-    19,370,551,089      instructions                     #    1.94  insn per cycle         
-       3.521931882 seconds time elapsed
+     9,811,967,265      cycles                           #    2.774 GHz                    
+    19,264,271,138      instructions                     #    1.96  insn per cycle         
+       3.538120513 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1670) (512y:  178) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.856445e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.503167e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.503167e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.763379e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.340075e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.340075e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.821454 sec
+TOTAL       :     3.978075 sec
 INFO: No Floating Point Exceptions have been reported
-     8,619,394,582      cycles                           #    2.251 GHz                    
-    15,699,269,615      instructions                     #    1.82  insn per cycle         
-       3.830496732 seconds time elapsed
+     8,415,239,556      cycles                           #    2.113 GHz                    
+    15,592,978,303      instructions                     #    1.85  insn per cycle         
+       3.983452996 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  866) (512y:  156) (512z: 1237)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt
index 012009e54a..21ee791ce0 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 39s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-08_20:07:19
+DATE: 2024-08-29_23:17:37
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.604046e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.930880e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.176471e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.492787e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.621744e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.179910e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     0.659931 sec
+TOTAL       :     0.685451 sec
 INFO: No Floating Point Exceptions have been reported
-     2,627,383,079      cycles                           #    2.945 GHz                    
-     4,093,880,816      instructions                     #    1.56  insn per cycle         
-       0.951439392 seconds time elapsed
+     2,651,264,313      cycles                           #    2.879 GHz                    
+     4,174,948,747      instructions                     #    1.57  insn per cycle         
+       0.978087360 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.646087e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.119341e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.119341e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.600183e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.050071e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.050071e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.251791 sec
+TOTAL       :     4.342486 sec
 INFO: No Floating Point Exceptions have been reported
-    12,834,346,286      cycles                           #    3.012 GHz                    
-    32,589,275,830      instructions                     #    2.54  insn per cycle         
-       4.261338656 seconds time elapsed
+    12,690,680,268      cycles                           #    2.919 GHz                    
+    32,481,885,780      instructions                     #    2.56  insn per cycle         
+       4.348166228 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  281) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.060473e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.955935e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.955935e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.021131e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.876235e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.876235e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.488809 sec
+TOTAL       :     3.528970 sec
 INFO: No Floating Point Exceptions have been reported
-    10,533,405,751      cycles                           #    3.012 GHz                    
-    24,716,100,998      instructions                     #    2.35  insn per cycle         
-       3.498417147 seconds time elapsed
+    10,310,937,032      cycles                           #    2.918 GHz                    
+    24,600,387,185      instructions                     #    2.39  insn per cycle         
+       3.534503426 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1251) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.261794e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.343751e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.343751e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.215463e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.225023e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.225023e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.211208 sec
+TOTAL       :     3.252959 sec
 INFO: No Floating Point Exceptions have been reported
-     9,296,707,178      cycles                           #    2.887 GHz                    
-    17,025,233,631      instructions                     #    1.83  insn per cycle         
-       3.220709148 seconds time elapsed
+     9,082,671,735      cycles                           #    2.788 GHz                    
+    16,914,669,530      instructions                     #    1.86  insn per cycle         
+       3.258703580 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1608) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.333155e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.462746e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.462746e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.291847e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.370167e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.370167e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.127002 sec
+TOTAL       :     3.155252 sec
 INFO: No Floating Point Exceptions have been reported
-     9,070,042,536      cycles                           #    2.893 GHz                    
-    16,440,168,447      instructions                     #    1.81  insn per cycle         
-       3.136632933 seconds time elapsed
+     8,860,640,460      cycles                           #    2.804 GHz                    
+    16,337,302,677      instructions                     #    1.84  insn per cycle         
+       3.161006597 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1344) (512y:  139) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.025516e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.816401e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.816401e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.936422e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.656528e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.656528e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.537864 sec
+TOTAL       :     3.659819 sec
 INFO: No Floating Point Exceptions have been reported
-     8,060,468,675      cycles                           #    2.273 GHz                    
-    14,674,271,295      instructions                     #    1.82  insn per cycle         
-       3.547452410 seconds time elapsed
+     7,888,827,724      cycles                           #    2.154 GHz                    
+    14,564,894,802      instructions                     #    1.85  insn per cycle         
+       3.665214653 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  990) (512y:  158) (512z:  954)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt
index 6698342434..7a6406d0c4 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 26s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-08_20:07:46
+DATE: 2024-08-29_23:18:03
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.562157e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.979811e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.228825e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.493801e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.641626e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.214265e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     0.660029 sec
+TOTAL       :     0.687800 sec
 INFO: No Floating Point Exceptions have been reported
-     2,629,191,587      cycles                           #    2.942 GHz                    
-     4,053,968,750      instructions                     #    1.54  insn per cycle         
-       0.953306046 seconds time elapsed
+     2,691,894,772      cycles                           #    2.880 GHz                    
+     4,100,143,973      instructions                     #    1.52  insn per cycle         
+       0.992202496 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.156529e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.042455e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.042455e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.096482e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.950388e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.950388e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.343977 sec
+TOTAL       :     3.412152 sec
 INFO: No Floating Point Exceptions have been reported
-    10,082,768,824      cycles                           #    3.008 GHz                    
-    25,523,612,333      instructions                     #    2.53  insn per cycle         
-       3.352820230 seconds time elapsed
+     9,976,603,472      cycles                           #    2.921 GHz                    
+    25,417,126,470      instructions                     #    2.55  insn per cycle         
+       3.417418960 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  236) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.385757e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.677774e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.677774e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.373808e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.642763e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.642763e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.073965 sec
+TOTAL       :     3.066171 sec
 INFO: No Floating Point Exceptions have been reported
-     9,151,066,373      cycles                           #    2.969 GHz                    
-    21,519,389,474      instructions                     #    2.35  insn per cycle         
-       3.083295145 seconds time elapsed
+     8,977,198,886      cycles                           #    2.923 GHz                    
+    21,409,395,242      instructions                     #    2.38  insn per cycle         
+       3.072036826 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1100) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.361878e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.558423e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.558423e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.352048e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.507087e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.507087e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.100634 sec
+TOTAL       :     3.082500 sec
 INFO: No Floating Point Exceptions have been reported
-     8,837,735,013      cycles                           #    2.843 GHz                    
-    15,972,170,074      instructions                     #    1.81  insn per cycle         
-       3.110024553 seconds time elapsed
+     8,651,955,812      cycles                           #    2.802 GHz                    
+    15,864,616,657      instructions                     #    1.83  insn per cycle         
+       3.088272692 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1481) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.456785e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.751546e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.751546e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.417412e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.673255e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.673255e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     2.990911 sec
+TOTAL       :     3.013451 sec
 INFO: No Floating Point Exceptions have been reported
-     8,652,752,906      cycles                           #    2.885 GHz                    
-    15,679,245,875      instructions                     #    1.81  insn per cycle         
-       3.000632003 seconds time elapsed
+     8,445,458,225      cycles                           #    2.799 GHz                    
+    15,571,912,210      instructions                     #    1.84  insn per cycle         
+       3.018940474 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1256) (512y:  141) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.146098e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.052577e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.052577e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.055814e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.899489e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.899489e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.361559 sec
+TOTAL       :     3.470320 sec
 INFO: No Floating Point Exceptions have been reported
-     7,684,713,240      cycles                           #    2.281 GHz                    
-    14,381,480,169      instructions                     #    1.87  insn per cycle         
-       3.370756572 seconds time elapsed
+     7,569,596,039      cycles                           #    2.178 GHz                    
+    14,278,276,260      instructions                     #    1.89  insn per cycle         
+       3.476115796 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1019) (512y:  164) (512z:  876)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inlL_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inlL_hrd0.txt
new file mode 100644
index 0000000000..f981434028
--- /dev/null
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inlL_hrd0.txt
@@ -0,0 +1,229 @@
+
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 13s
+------------------------------------------------
+
+Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+BACKEND=cpp512y (was cppauto)
+OMPFLAGS=
+FPTYPE='d'
+HELINL='0'
+HRDCOD='0'
+HASCURAND=hasCurand
+HASHIPRAND=hasNoHiprand
+Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
+make: Nothing to be done for 'gtestlibs'.
+
+make USEBUILDDIR=1 BACKEND=cuda
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+
+make USEBUILDDIR=1 BACKEND=cppnone
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+
+make USEBUILDDIR=1 BACKEND=cppsse4
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+
+make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+
+make USEBUILDDIR=1 BACKEND=cpp512y
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+
+make USEBUILDDIR=1 BACKEND=cpp512z
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+
+DATE: 2024-08-30_00:39:00
+
+On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inlL_hrd0/check_cuda.exe -p 2048 256 12 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 5.587429e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.989450e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.328569e+08                 )  sec^-1
+MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
+TOTAL       :     0.713162 sec
+INFO: No Floating Point Exceptions have been reported
+     2,693,494,366      cycles                           #    2.844 GHz                    
+     4,177,227,042      instructions                     #    1.55  insn per cycle         
+       1.006179245 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inlL_hrd0/check_cuda.exe -p 2048 256 1
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 188
+==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inlL_hrd0/runTest_cuda.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inlL_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inlL_hrd0/fcheck_cuda.exe 2 64 2
+Avg ME (C++/GPU)   = 1.282804e-02
+Avg ME (F77/GPU)   = 1.2828039868165201E-002
+Relative difference = 1.0277080522138477e-08
+OK (relative difference <= 5E-3)
+=========================================================================
+Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inlL_hrd0/check_hip.exe
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inlL_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
+EvtsPerSec[Rmb+ME]     (23) = ( 9.731290e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.123870e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.123870e+06                 )  sec^-1
+MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
+TOTAL       :     6.877047 sec
+INFO: No Floating Point Exceptions have been reported
+    20,081,365,409      cycles                           #    2.918 GHz                    
+    50,906,799,397      instructions                     #    2.54  insn per cycle         
+       6.882687044 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  153) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.282804e-02
+Avg ME (F77/C++)    = 1.2828039868164916E-002
+Relative difference = 1.0277102699700292e-08
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inlL_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 1.464934e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.869021e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.869021e+06                 )  sec^-1
+MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
+TOTAL       :     4.709652 sec
+INFO: No Floating Point Exceptions have been reported
+    13,710,824,982      cycles                           #    2.909 GHz                    
+    34,027,875,789      instructions                     #    2.48  insn per cycle         
+       4.715209456 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  460) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.282804e-02
+Avg ME (F77/C++)    = 1.2828039868164916E-002
+Relative difference = 1.0277102699700292e-08
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inlL_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 1.901489e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.604068e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.604068e+06                 )  sec^-1
+MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
+TOTAL       :     3.718431 sec
+INFO: No Floating Point Exceptions have been reported
+    10,366,336,114      cycles                           #    2.785 GHz                    
+    20,235,913,886      instructions                     #    1.95  insn per cycle         
+       3.724013133 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1079) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.282804e-02
+Avg ME (F77/C++)    = 1.2828039868165088E-002
+Relative difference = 1.0277089312025782e-08
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inlL_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 1.956806e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.698252e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.698252e+06                 )  sec^-1
+MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
+TOTAL       :     3.623265 sec
+INFO: No Floating Point Exceptions have been reported
+    10,151,613,903      cycles                           #    2.798 GHz                    
+    20,078,116,551      instructions                     #    1.98  insn per cycle         
+       3.628876915 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  899) (512y:  170) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.282804e-02
+Avg ME (F77/C++)    = 1.2828039868165088E-002
+Relative difference = 1.0277089312025782e-08
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inlL_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 1.663404e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.167886e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.167886e+06                 )  sec^-1
+MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
+TOTAL       :     4.190869 sec
+INFO: No Floating Point Exceptions have been reported
+     8,811,636,276      cycles                           #    2.100 GHz                    
+    16,396,474,245      instructions                     #    1.86  insn per cycle         
+       4.196501706 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  823) (512y:  152) (512z:  608)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.282804e-02
+Avg ME (F77/C++)    = 1.2828039868165088E-002
+Relative difference = 1.0277089312025782e-08
+OK (relative difference <= 5E-3)
+=========================================================================
+
+TEST COMPLETED
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt
index 7cb0226a73..2b79257b9d 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 55s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-08_19:48:51
+DATE: 2024-08-29_22:46:28
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.527020e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.262134e+09                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.154425e+09                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.371929e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.187596e+09                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.156077e+09                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371687e-02 +- 3.270220e-06 )  GeV^0
-TOTAL       :     0.568660 sec
+TOTAL       :     0.576278 sec
 INFO: No Floating Point Exceptions have been reported
-     2,313,614,099      cycles                           #    2.926 GHz                    
-     3,562,444,599      instructions                     #    1.54  insn per cycle         
-       0.849201094 seconds time elapsed
+     2,286,525,925      cycles                           #    2.859 GHz                    
+     3,613,378,662      instructions                     #    1.58  insn per cycle         
+       0.856826723 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 121
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.093483e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.290231e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.290231e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.074323e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.268108e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.268108e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     6.128520 sec
+TOTAL       :     6.228196 sec
 INFO: No Floating Point Exceptions have been reported
-    18,358,884,229      cycles                           #    2.993 GHz                    
-    45,043,610,227      instructions                     #    2.45  insn per cycle         
-       6.135113438 seconds time elapsed
+    18,255,285,954      cycles                           #    2.930 GHz                    
+    45,002,856,271      instructions                     #    2.47  insn per cycle         
+       6.233407509 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  411) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.301890e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.520762e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.520762e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.248332e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.427587e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.427587e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     3.110805 sec
+TOTAL       :     3.172190 sec
 INFO: No Floating Point Exceptions have been reported
-     9,366,787,669      cycles                           #    3.005 GHz                    
-    22,330,309,821      instructions                     #    2.38  insn per cycle         
-       3.117673303 seconds time elapsed
+     9,271,630,908      cycles                           #    2.919 GHz                    
+    22,288,587,741      instructions                     #    2.40  insn per cycle         
+       3.177416791 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1957) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.473210e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.807312e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.807312e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.412454e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.674219e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.674219e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.917892 sec
+TOTAL       :     2.979908 sec
 INFO: No Floating Point Exceptions have been reported
-     8,504,359,827      cycles                           #    2.909 GHz                    
-    15,788,659,527      instructions                     #    1.86  insn per cycle         
-       2.924742872 seconds time elapsed
+     8,365,515,955      cycles                           #    2.803 GHz                    
+    15,745,814,699      instructions                     #    1.88  insn per cycle         
+       2.985184245 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2595) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.503770e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.901448e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.901448e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.459370e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.777330e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.777330e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.886577 sec
+TOTAL       :     2.932621 sec
 INFO: No Floating Point Exceptions have been reported
-     8,412,391,431      cycles                           #    2.908 GHz                    
-    15,643,654,257      instructions                     #    1.86  insn per cycle         
-       2.893387724 seconds time elapsed
+     8,230,915,348      cycles                           #    2.804 GHz                    
+    15,599,118,118      instructions                     #    1.90  insn per cycle         
+       2.937899823 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2500) (512y:   12) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.563180e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.953888e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.953888e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.441935e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.730572e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.730572e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270342e-06 )  GeV^0
-TOTAL       :     2.828437 sec
+TOTAL       :     2.949312 sec
 INFO: No Floating Point Exceptions have been reported
-     6,692,094,866      cycles                           #    2.362 GHz                    
-    12,901,049,888      instructions                     #    1.93  insn per cycle         
-       2.834887138 seconds time elapsed
+     6,640,894,201      cycles                           #    2.249 GHz                    
+    12,857,017,252      instructions                     #    1.94  insn per cycle         
+       2.954500755 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1728) (512y:   17) (512z: 1439)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt
index e0350b6b37..5d45f34ad5 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 03s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-08_20:17:08
+DATE: 2024-08-29_23:27:41
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +57,15 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.473571e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.655207e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.655207e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.143990e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.691952e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.691952e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371710e-02 +- 3.270389e-06 )  GeV^0
-TOTAL       :     1.648294 sec
+TOTAL       :     1.706255 sec
 INFO: No Floating Point Exceptions have been reported
-     5,601,516,010      cycles                           #    2.985 GHz                    
-    10,167,612,404      instructions                     #    1.82  insn per cycle         
-       1.933877739 seconds time elapsed
+     5,593,795,242      cycles                           #    2.895 GHz                    
+    10,219,614,438      instructions                     #    1.83  insn per cycle         
+       1.989782693 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
@@ -91,15 +95,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.085388e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.276616e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.276616e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.053223e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.238616e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.238616e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     6.267894 sec
+TOTAL       :     6.449226 sec
 INFO: No Floating Point Exceptions have been reported
-    18,908,429,443      cycles                           #    3.015 GHz                    
-    45,146,579,440      instructions                     #    2.39  insn per cycle         
-       6.274110345 seconds time elapsed
+    18,857,985,149      cycles                           #    2.922 GHz                    
+    45,146,976,947      instructions                     #    2.39  insn per cycle         
+       6.455362407 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  411) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -121,15 +125,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.203296e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.287244e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.287244e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.149278e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.209808e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.209808e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     3.346534 sec
+TOTAL       :     3.428675 sec
 INFO: No Floating Point Exceptions have been reported
-    10,054,217,163      cycles                           #    3.000 GHz                    
-    23,624,196,038      instructions                     #    2.35  insn per cycle         
-       3.352720761 seconds time elapsed
+     9,989,397,664      cycles                           #    2.909 GHz                    
+    23,625,198,030      instructions                     #    2.37  insn per cycle         
+       3.434932561 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1957) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -151,15 +155,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.355349e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.546206e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.546206e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.297366e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.420681e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.420681e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     3.162857 sec
+TOTAL       :     3.239437 sec
 INFO: No Floating Point Exceptions have been reported
-     9,188,398,792      cycles                           #    2.900 GHz                    
-    16,865,170,162      instructions                     #    1.84  insn per cycle         
-       3.169069798 seconds time elapsed
+     9,084,126,388      cycles                           #    2.800 GHz                    
+    16,865,877,588      instructions                     #    1.86  insn per cycle         
+       3.245743617 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2595) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -181,15 +185,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.385264e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.627916e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.627916e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.345633e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.535854e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.535854e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     3.125444 sec
+TOTAL       :     3.180459 sec
 INFO: No Floating Point Exceptions have been reported
-     9,070,498,443      cycles                           #    2.897 GHz                    
-    16,723,535,304      instructions                     #    1.84  insn per cycle         
-       3.131626525 seconds time elapsed
+     8,968,440,697      cycles                           #    2.815 GHz                    
+    16,723,850,550      instructions                     #    1.86  insn per cycle         
+       3.186623646 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2500) (512y:   12) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -211,15 +215,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.403637e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.591618e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.591618e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.312595e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.430024e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.430024e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270342e-06 )  GeV^0
-TOTAL       :     3.114765 sec
+TOTAL       :     3.220913 sec
 INFO: No Floating Point Exceptions have been reported
-     7,403,928,752      cycles                           #    2.373 GHz                    
-    14,061,923,411      instructions                     #    1.90  insn per cycle         
-       3.121062730 seconds time elapsed
+     7,368,957,264      cycles                           #    2.284 GHz                    
+    14,062,326,131      instructions                     #    1.91  insn per cycle         
+       3.227188579 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1728) (512y:   17) (512z: 1439)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt
index 134d5790db..9524ab2b35 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 00s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-08_20:28:33
+DATE: 2024-08-29_23:39:18
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.369933e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.192240e+09                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.130758e+09                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.369646e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.161551e+09                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.100782e+09                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371863e-02 +- 3.269951e-06 )  GeV^0
-TOTAL       :     1.177651 sec
+TOTAL       :     1.208837 sec
 INFO: No Floating Point Exceptions have been reported
-     4,159,647,361      cycles                           #    2.974 GHz                    
-     6,655,919,197      instructions                     #    1.60  insn per cycle         
-       1.454885517 seconds time elapsed
+     4,108,047,745      cycles                           #    2.875 GHz                    
+     6,574,138,582      instructions                     #    1.60  insn per cycle         
+       1.486544721 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 121
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.106596e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.306356e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.306356e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.071530e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.264231e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.264231e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371887e-02 +- 3.270267e-06 )  GeV^0
-TOTAL       :     6.378232 sec
+TOTAL       :     6.589611 sec
 INFO: No Floating Point Exceptions have been reported
-    19,274,317,116      cycles                           #    3.020 GHz                    
-    45,182,791,116      instructions                     #    2.34  insn per cycle         
-       6.383426426 seconds time elapsed
+    19,260,628,477      cycles                           #    2.921 GHz                    
+    45,180,488,610      instructions                     #    2.35  insn per cycle         
+       6.594714994 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  411) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.314732e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.536945e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.536945e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.254234e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.437552e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.437552e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371887e-02 +- 3.270266e-06 )  GeV^0
-TOTAL       :     3.415254 sec
+TOTAL       :     3.508816 sec
 INFO: No Floating Point Exceptions have been reported
-    10,316,548,749      cycles                           #    3.017 GHz                    
-    22,369,828,182      instructions                     #    2.17  insn per cycle         
-       3.420542694 seconds time elapsed
+    10,282,216,303      cycles                           #    2.927 GHz                    
+    22,372,083,981      instructions                     #    2.18  insn per cycle         
+       3.513966837 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1957) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.440596e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.750420e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.750420e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.407117e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.666910e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.666910e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371885e-02 +- 3.270112e-06 )  GeV^0
-TOTAL       :     3.274423 sec
+TOTAL       :     3.330770 sec
 INFO: No Floating Point Exceptions have been reported
-     9,443,732,115      cycles                           #    2.881 GHz                    
-    15,660,089,896      instructions                     #    1.66  insn per cycle         
-       3.279649935 seconds time elapsed
+     9,378,233,860      cycles                           #    2.812 GHz                    
+    15,657,056,788      instructions                     #    1.67  insn per cycle         
+       3.335855333 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2595) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.490204e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.861466e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.861466e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.448156e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.779179e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.779179e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371885e-02 +- 3.270112e-06 )  GeV^0
-TOTAL       :     3.226764 sec
+TOTAL       :     3.294832 sec
 INFO: No Floating Point Exceptions have been reported
-     9,373,690,310      cycles                           #    2.901 GHz                    
-    15,311,292,063      instructions                     #    1.63  insn per cycle         
-       3.231783686 seconds time elapsed
+     9,310,523,208      cycles                           #    2.823 GHz                    
+    15,310,125,986      instructions                     #    1.64  insn per cycle         
+       3.300035766 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2500) (512y:   12) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.539604e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.891988e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.891988e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.434912e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.707938e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.707938e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371885e-02 +- 3.270112e-06 )  GeV^0
-TOTAL       :     3.181070 sec
+TOTAL       :     3.309853 sec
 INFO: No Floating Point Exceptions have been reported
-     7,641,722,393      cycles                           #    2.399 GHz                    
-    12,564,622,024      instructions                     #    1.64  insn per cycle         
-       3.186357864 seconds time elapsed
+     7,657,370,946      cycles                           #    2.311 GHz                    
+    12,565,328,780      instructions                     #    1.64  insn per cycle         
+       3.314923960 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1728) (512y:   17) (512z: 1439)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt
index 88892aa3af..ffc392dd39 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 01s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-08_20:25:48
+DATE: 2024-08-29_23:36:30
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.382651e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.206198e+09                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.156880e+09                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.375499e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.177052e+09                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.131045e+09                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371687e-02 +- 3.270220e-06 )  GeV^0
-TOTAL       :     0.845509 sec
+TOTAL       :     0.860707 sec
 INFO: No Floating Point Exceptions have been reported
-     3,157,288,524      cycles                           #    2.956 GHz                    
-     6,452,716,967      instructions                     #    2.04  insn per cycle         
-       1.124028974 seconds time elapsed
+     3,119,359,049      cycles                           #    2.875 GHz                    
+     6,403,047,459      instructions                     #    2.05  insn per cycle         
+       1.141875008 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 121
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.102313e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.299140e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.299140e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.074662e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.267562e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.267562e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     6.067726 sec
+TOTAL       :     6.224213 sec
 INFO: No Floating Point Exceptions have been reported
-    18,241,926,835      cycles                           #    3.004 GHz                    
-    44,997,190,895      instructions                     #    2.47  insn per cycle         
-       6.073021817 seconds time elapsed
+    18,239,367,389      cycles                           #    2.929 GHz                    
+    45,000,984,186      instructions                     #    2.47  insn per cycle         
+       6.229329227 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  411) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.262484e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.452586e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.452586e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.256146e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.437532e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.437532e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     3.153640 sec
+TOTAL       :     3.162633 sec
 INFO: No Floating Point Exceptions have been reported
-     9,294,014,762      cycles                           #    2.943 GHz                    
-    22,288,953,735      instructions                     #    2.40  insn per cycle         
-       3.158807454 seconds time elapsed
+     9,272,693,132      cycles                           #    2.928 GHz                    
+    22,289,563,655      instructions                     #    2.40  insn per cycle         
+       3.167689272 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1957) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.393307e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.660811e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.660811e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.406656e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.671297e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.671297e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     3.002727 sec
+TOTAL       :     2.984968 sec
 INFO: No Floating Point Exceptions have been reported
-     8,431,789,445      cycles                           #    2.804 GHz                    
-    15,745,619,364      instructions                     #    1.87  insn per cycle         
-       3.007966059 seconds time elapsed
+     8,391,394,313      cycles                           #    2.808 GHz                    
+    15,746,990,400      instructions                     #    1.88  insn per cycle         
+       2.990019398 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2595) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.401412e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.704220e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.704220e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.456607e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.788447e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.788447e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.993880 sec
+TOTAL       :     2.931017 sec
 INFO: No Floating Point Exceptions have been reported
-     8,307,647,714      cycles                           #    2.771 GHz                    
-    15,598,428,137      instructions                     #    1.88  insn per cycle         
-       2.998876053 seconds time elapsed
+     8,255,059,737      cycles                           #    2.812 GHz                    
+    15,603,739,815      instructions                     #    1.89  insn per cycle         
+       2.936040895 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2500) (512y:   12) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.569189e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.940564e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.940564e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.440909e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.726128e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.726128e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270342e-06 )  GeV^0
-TOTAL       :     2.807856 sec
+TOTAL       :     2.950959 sec
 INFO: No Floating Point Exceptions have been reported
-     6,608,078,812      cycles                           #    2.350 GHz                    
-    12,854,592,970      instructions                     #    1.95  insn per cycle         
-       2.812995127 seconds time elapsed
+     6,644,904,192      cycles                           #    2.249 GHz                    
+    12,855,533,735      instructions                     #    1.93  insn per cycle         
+       2.956304259 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1728) (512y:   17) (512z: 1439)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt
index 9b85e8bca9..6f289e010a 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 00s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-08_20:23:04
+DATE: 2024-08-29_23:33:44
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -50,15 +54,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.140303e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.190749e+09                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.050049e+09                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.960198e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.134432e+09                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.015289e+09                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371710e-02 +- 3.270389e-06 )  GeV^0
-TOTAL       :     1.475514 sec
+TOTAL       :     1.499916 sec
 INFO: No Floating Point Exceptions have been reported
-     5,002,845,340      cycles                           #    2.948 GHz                    
-     9,174,343,943      instructions                     #    1.83  insn per cycle         
-       1.753614320 seconds time elapsed
+     4,978,377,712      cycles                           #    2.897 GHz                    
+     9,174,791,316      instructions                     #    1.84  insn per cycle         
+       1.776745570 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 121
@@ -84,15 +88,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.100425e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.302255e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.302255e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.073628e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.267249e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.267249e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     6.083427 sec
+TOTAL       :     6.228944 sec
 INFO: No Floating Point Exceptions have been reported
-    18,286,986,421      cycles                           #    3.004 GHz                    
-    44,997,971,916      instructions                     #    2.46  insn per cycle         
-       6.088650881 seconds time elapsed
+    18,251,581,479      cycles                           #    2.928 GHz                    
+    44,998,012,023      instructions                     #    2.47  insn per cycle         
+       6.234003939 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  411) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -113,15 +117,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.314534e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.542028e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.542028e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.238225e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.397221e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.397221e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     3.081783 sec
+TOTAL       :     3.184599 sec
 INFO: No Floating Point Exceptions have been reported
-     9,321,092,178      cycles                           #    3.020 GHz                    
-    22,287,543,522      instructions                     #    2.39  insn per cycle         
-       3.087086590 seconds time elapsed
+     9,283,360,179      cycles                           #    2.911 GHz                    
+    22,288,296,036      instructions                     #    2.40  insn per cycle         
+       3.189871847 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1957) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -142,15 +146,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.473883e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.791063e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.791063e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.382997e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.617819e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.617819e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.904887 sec
+TOTAL       :     3.013651 sec
 INFO: No Floating Point Exceptions have been reported
-     8,410,533,055      cycles                           #    2.892 GHz                    
-    15,745,298,993      instructions                     #    1.87  insn per cycle         
-       2.910034115 seconds time elapsed
+     8,377,553,104      cycles                           #    2.776 GHz                    
+    15,745,871,236      instructions                     #    1.88  insn per cycle         
+       3.018832010 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2595) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -171,15 +175,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.505951e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.882287e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.882287e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.460372e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.777278e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.777278e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.874716 sec
+TOTAL       :     2.927250 sec
 INFO: No Floating Point Exceptions have been reported
-     8,289,781,145      cycles                           #    2.880 GHz                    
-    15,603,340,875      instructions                     #    1.88  insn per cycle         
-       2.879926744 seconds time elapsed
+     8,236,102,004      cycles                           #    2.810 GHz                    
+    15,605,409,431      instructions                     #    1.89  insn per cycle         
+       2.932355101 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2500) (512y:   12) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -200,15 +204,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.541059e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.907885e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.907885e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.420493e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.701429e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.701429e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270342e-06 )  GeV^0
-TOTAL       :     2.838024 sec
+TOTAL       :     2.973248 sec
 INFO: No Floating Point Exceptions have been reported
-     6,642,493,654      cycles                           #    2.337 GHz                    
-    12,855,006,533      instructions                     #    1.94  insn per cycle         
-       2.843273121 seconds time elapsed
+     6,660,400,903      cycles                           #    2.237 GHz                    
+    12,857,113,210      instructions                     #    1.93  insn per cycle         
+       2.978399312 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1728) (512y:   17) (512z: 1439)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt
index 1d6c5eac35..f29bf7a852 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 49s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-08_19:49:17
+DATE: 2024-08-29_22:46:54
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.538728e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.270981e+09                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.213583e+09                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.369611e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.203420e+09                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.201295e+09                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371687e-02 +- 3.270220e-06 )  GeV^0
-TOTAL       :     0.564431 sec
+TOTAL       :     0.574209 sec
 INFO: No Floating Point Exceptions have been reported
-     2,335,295,476      cycles                           #    2.965 GHz                    
-     3,628,047,058      instructions                     #    1.55  insn per cycle         
-       0.844723791 seconds time elapsed
+     2,289,523,558      cycles                           #    2.867 GHz                    
+     3,616,808,454      instructions                     #    1.58  insn per cycle         
+       0.854990957 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 95
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.105961e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.305064e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.305064e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.071743e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.267243e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.267243e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     6.061656 sec
+TOTAL       :     6.239937 sec
 INFO: No Floating Point Exceptions have been reported
-    18,285,648,193      cycles                           #    3.014 GHz                    
-    45,012,181,796      instructions                     #    2.46  insn per cycle         
-       6.068344943 seconds time elapsed
+    18,251,320,949      cycles                           #    2.923 GHz                    
+    44,972,609,752      instructions                     #    2.46  insn per cycle         
+       6.244989271 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  397) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.291804e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.489005e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.489005e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.244827e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.409474e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.409474e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     3.124490 sec
+TOTAL       :     3.175872 sec
 INFO: No Floating Point Exceptions have been reported
-     9,410,134,292      cycles                           #    3.006 GHz                    
-    22,303,224,878      instructions                     #    2.37  insn per cycle         
-       3.131481201 seconds time elapsed
+     9,283,967,387      cycles                           #    2.919 GHz                    
+    22,256,299,457      instructions                     #    2.40  insn per cycle         
+       3.181052612 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1940) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.475997e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.815316e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.815316e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.411886e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.680552e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.680552e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.909295 sec
+TOTAL       :     2.977280 sec
 INFO: No Floating Point Exceptions have been reported
-     8,493,085,415      cycles                           #    2.913 GHz                    
-    15,781,425,735      instructions                     #    1.86  insn per cycle         
-       2.916002973 seconds time elapsed
+     8,365,626,441      cycles                           #    2.806 GHz                    
+    15,740,277,047      instructions                     #    1.88  insn per cycle         
+       2.982795229 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2570) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.513335e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.913286e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.913286e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.432222e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.720588e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.720588e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.878218 sec
+TOTAL       :     2.961844 sec
 INFO: No Floating Point Exceptions have been reported
-     8,394,171,701      cycles                           #    2.911 GHz                    
-    15,627,283,272      instructions                     #    1.86  insn per cycle         
-       2.884835196 seconds time elapsed
+     8,238,479,740      cycles                           #    2.778 GHz                    
+    15,586,179,769      instructions                     #    1.89  insn per cycle         
+       2.966991606 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2469) (512y:   12) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.564665e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.956343e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.956343e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.428069e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.704769e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.704769e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270342e-06 )  GeV^0
-TOTAL       :     2.826301 sec
+TOTAL       :     2.963513 sec
 INFO: No Floating Point Exceptions have been reported
-     6,645,156,055      cycles                           #    2.346 GHz                    
-    12,878,593,303      instructions                     #    1.94  insn per cycle         
-       2.832875887 seconds time elapsed
+     6,611,440,096      cycles                           #    2.228 GHz                    
+    12,834,325,416      instructions                     #    1.94  insn per cycle         
+       2.968824892 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1698) (512y:   18) (512z: 1427)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt
index 2b62892e6a..3ba49685fe 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 30s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-08_20:08:10
+DATE: 2024-08-29_23:18:27
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.451320e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.231819e+09                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.130769e+09                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.296010e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.154221e+09                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.150279e+09                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371687e-02 +- 3.270220e-06 )  GeV^0
-TOTAL       :     0.567390 sec
+TOTAL       :     0.584441 sec
 INFO: No Floating Point Exceptions have been reported
-     2,325,688,868      cycles                           #    2.936 GHz                    
-     3,579,904,434      instructions                     #    1.54  insn per cycle         
-       0.848470717 seconds time elapsed
+     2,320,419,824      cycles                           #    2.868 GHz                    
+     3,634,414,746      instructions                     #    1.57  insn per cycle         
+       0.867556691 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 121
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.665768e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.163815e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.163815e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.632172e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.125712e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.125712e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     4.146283 sec
+TOTAL       :     4.228730 sec
 INFO: No Floating Point Exceptions have been reported
-    12,236,614,644      cycles                           #    2.947 GHz                    
-    32,269,366,728      instructions                     #    2.64  insn per cycle         
-       4.152494891 seconds time elapsed
+    12,184,938,847      cycles                           #    2.878 GHz                    
+    32,237,474,232      instructions                     #    2.65  insn per cycle         
+       4.234313345 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  290) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.716868e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.596230e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.596230e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.666121e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.502284e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.502284e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     2.692634 sec
+TOTAL       :     2.736132 sec
 INFO: No Floating Point Exceptions have been reported
-     8,040,413,978      cycles                           #    2.980 GHz                    
-    18,731,295,679      instructions                     #    2.33  insn per cycle         
-       2.699009464 seconds time elapsed
+     8,001,531,537      cycles                           #    2.920 GHz                    
+    18,696,819,081      instructions                     #    2.34  insn per cycle         
+       2.741605566 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1548) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.823808e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.734147e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.734147e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.780005e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.594822e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.594822e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.599488 sec
+TOTAL       :     2.634151 sec
 INFO: No Floating Point Exceptions have been reported
-     7,529,267,846      cycles                           #    2.890 GHz                    
-    14,278,306,013      instructions                     #    1.90  insn per cycle         
-       2.606005161 seconds time elapsed
+     7,439,358,965      cycles                           #    2.819 GHz                    
+    14,242,045,254      instructions                     #    1.91  insn per cycle         
+       2.639862200 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2222) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.881055e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.928068e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.928068e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.837633e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.742018e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.742018e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.551515 sec
+TOTAL       :     2.586770 sec
 INFO: No Floating Point Exceptions have been reported
-     7,444,338,967      cycles                           #    2.911 GHz                    
-    13,969,219,259      instructions                     #    1.88  insn per cycle         
-       2.557876734 seconds time elapsed
+     7,296,042,539      cycles                           #    2.815 GHz                    
+    13,934,395,120      instructions                     #    1.91  insn per cycle         
+       2.592340781 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2074) (512y:    3) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.593244e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.031185e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.031185e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.455305e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.802315e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.802315e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270342e-06 )  GeV^0
-TOTAL       :     2.800385 sec
+TOTAL       :     2.939990 sec
 INFO: No Floating Point Exceptions have been reported
-     6,564,002,113      cycles                           #    2.339 GHz                    
-    13,450,088,279      instructions                     #    2.05  insn per cycle         
-       2.806913095 seconds time elapsed
+     6,572,892,326      cycles                           #    2.232 GHz                    
+    13,414,317,144      instructions                     #    2.04  insn per cycle         
+       2.945482478 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2056) (512y:    1) (512z: 1197)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt
index 5ae8d74446..3bb46f222b 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 30s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-08_20:08:33
+DATE: 2024-08-29_23:18:51
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.456866e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.267705e+09                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.218590e+09                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.296037e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.166988e+09                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.207168e+09                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371687e-02 +- 3.270220e-06 )  GeV^0
-TOTAL       :     0.568736 sec
+TOTAL       :     0.580765 sec
 INFO: No Floating Point Exceptions have been reported
-     2,333,386,939      cycles                           #    2.946 GHz                    
-     3,651,568,314      instructions                     #    1.56  insn per cycle         
-       0.849375970 seconds time elapsed
+     2,332,012,513      cycles                           #    2.872 GHz                    
+     3,668,769,216      instructions                     #    1.57  insn per cycle         
+       0.868538079 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 95
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.283106e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.333262e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.333262e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.207934e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.220486e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.220486e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     3.121614 sec
+TOTAL       :     3.220883 sec
 INFO: No Floating Point Exceptions have been reported
-     9,386,181,268      cycles                           #    3.002 GHz                    
-    25,683,181,247      instructions                     #    2.74  insn per cycle         
-       3.127889698 seconds time elapsed
+     9,370,617,522      cycles                           #    2.905 GHz                    
+    25,652,019,955      instructions                     #    2.74  insn per cycle         
+       3.226474616 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  243) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.093996e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.729930e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.729930e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.997177e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.535513e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.535513e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     2.404675 sec
+TOTAL       :     2.476192 sec
 INFO: No Floating Point Exceptions have been reported
-     7,273,765,849      cycles                           #    3.018 GHz                    
-    16,902,173,009      instructions                     #    2.32  insn per cycle         
-       2.411177480 seconds time elapsed
+     7,229,355,154      cycles                           #    2.914 GHz                    
+    16,867,444,320      instructions                     #    2.33  insn per cycle         
+       2.481841403 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1350) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.955814e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.106638e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.106638e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.936522e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.033228e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.033228e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.499207 sec
+TOTAL       :     2.509294 sec
 INFO: No Floating Point Exceptions have been reported
-     7,265,897,672      cycles                           #    2.902 GHz                    
-    13,654,744,957      instructions                     #    1.88  insn per cycle         
-       2.505830767 seconds time elapsed
+     7,122,962,556      cycles                           #    2.833 GHz                    
+    13,619,646,135      instructions                     #    1.91  insn per cycle         
+       2.514911103 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2046) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.024505e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.340418e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.340418e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.973129e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.132874e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.132874e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.448205 sec
+TOTAL       :     2.485889 sec
 INFO: No Floating Point Exceptions have been reported
-     7,137,327,072      cycles                           #    2.909 GHz                    
-    13,455,725,408      instructions                     #    1.89  insn per cycle         
-       2.454335523 seconds time elapsed
+     7,022,473,336      cycles                           #    2.819 GHz                    
+    13,427,556,193      instructions                     #    1.91  insn per cycle         
+       2.491467170 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1927) (512y:    4) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.717556e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.328622e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.328622e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.610878e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.134667e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.134667e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270342e-06 )  GeV^0
-TOTAL       :     2.693340 sec
+TOTAL       :     2.785068 sec
 INFO: No Floating Point Exceptions have been reported
-     6,390,724,476      cycles                           #    2.368 GHz                    
-    13,180,968,753      instructions                     #    2.06  insn per cycle         
-       2.699833523 seconds time elapsed
+     6,337,623,889      cycles                           #    2.272 GHz                    
+    13,143,208,474      instructions                     #    2.07  insn per cycle         
+       2.790716425 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2012) (512y:    1) (512z: 1083)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inlL_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inlL_hrd0.txt
new file mode 100644
index 0000000000..169ffacd73
--- /dev/null
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inlL_hrd0.txt
@@ -0,0 +1,229 @@
+
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 27s
+------------------------------------------------
+
+Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+BACKEND=cpp512y (was cppauto)
+OMPFLAGS=
+FPTYPE='d'
+HELINL='0'
+HRDCOD='0'
+HASCURAND=hasCurand
+HASHIPRAND=hasNoHiprand
+Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
+make: Nothing to be done for 'gtestlibs'.
+
+make USEBUILDDIR=1 BACKEND=cuda
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+
+make USEBUILDDIR=1 BACKEND=cppnone
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+
+make USEBUILDDIR=1 BACKEND=cppsse4
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+
+make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+
+make USEBUILDDIR=1 BACKEND=cpp512y
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+
+make USEBUILDDIR=1 BACKEND=cpp512z
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+
+DATE: 2024-08-30_00:39:31
+
+On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inlL_hrd0/check_cuda.exe -p 2048 256 12 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 1.068270e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.008280e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.777839e+08                 )  sec^-1
+MeanMatrixElemValue         = ( 1.371687e-02 +- 3.270220e-06 )  GeV^0
+TOTAL       :     0.596958 sec
+INFO: No Floating Point Exceptions have been reported
+     2,370,244,498      cycles                           #    2.860 GHz                    
+     3,714,655,802      instructions                     #    1.57  insn per cycle         
+       0.886666797 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inlL_hrd0/check_cuda.exe -p 2048 256 1
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 93
+==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inlL_hrd0/runTest_cuda.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inlL_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inlL_hrd0/fcheck_cuda.exe 2 64 2
+Avg ME (C++/GPU)   = 1.282802e-02
+Avg ME (F77/GPU)   = 1.2828112125134794E-002
+Relative difference = 7.1815552823662555e-06
+OK (relative difference <= 5E-3)
+=========================================================================
+Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inlL_hrd0/check_hip.exe
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inlL_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
+EvtsPerSec[Rmb+ME]     (23) = ( 1.000152e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.166022e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.166022e+06                 )  sec^-1
+MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
+TOTAL       :     6.662344 sec
+INFO: No Floating Point Exceptions have been reported
+    19,537,141,659      cycles                           #    2.931 GHz                    
+    49,881,468,916      instructions                     #    2.55  insn per cycle         
+       6.667681948 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  150) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.282804e-02
+Avg ME (F77/C++)    = 1.2828039854866802E-002
+Relative difference = 1.1313746984080878e-08
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inlL_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 2.158239e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.215912e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.215912e+06                 )  sec^-1
+MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
+TOTAL       :     3.287107 sec
+INFO: No Floating Point Exceptions have been reported
+     9,622,306,960      cycles                           #    2.923 GHz                    
+    23,527,780,343      instructions                     #    2.45  insn per cycle         
+       3.292352681 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  735) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.282804e-02
+Avg ME (F77/C++)    = 1.2828039280066150E-002
+Relative difference = 5.612189004572479e-08
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inlL_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 2.323781e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.495212e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.495212e+06                 )  sec^-1
+MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
+TOTAL       :     3.076132 sec
+INFO: No Floating Point Exceptions have been reported
+     8,631,164,800      cycles                           #    2.803 GHz                    
+    16,074,797,975      instructions                     #    1.86  insn per cycle         
+       3.081542467 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1721) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.282805e-02
+Avg ME (F77/C++)    = 1.2828053255361738E-002
+Relative difference = 2.5376902468575066e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inlL_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 2.384108e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.605806e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.605806e+06                 )  sec^-1
+MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
+TOTAL       :     3.010447 sec
+INFO: No Floating Point Exceptions have been reported
+     8,476,044,460      cycles                           #    2.811 GHz                    
+    15,998,699,950      instructions                     #    1.89  insn per cycle         
+       3.015791389 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1715) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.282805e-02
+Avg ME (F77/C++)    = 1.2828053255361738E-002
+Relative difference = 2.5376902468575066e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inlL_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 2.337724e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.538733e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.538733e+06                 )  sec^-1
+MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270342e-06 )  GeV^0
+TOTAL       :     3.066004 sec
+INFO: No Floating Point Exceptions have been reported
+     6,850,691,176      cycles                           #    2.231 GHz                    
+    13,121,482,744      instructions                     #    1.92  insn per cycle         
+       3.071334263 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1697) (512y:   12) (512z:  752)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.282805e-02
+Avg ME (F77/C++)    = 1.2828052575059701E-002
+Relative difference = 2.0073664354238512e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+
+TEST COMPLETED
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt
index dec1886a20..28937a024f 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 55s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-08_19:49:43
+DATE: 2024-08-29_22:47:21
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.471546e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.855416e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.166311e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.818500e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.703571e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.142047e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     0.664565 sec
+TOTAL       :     0.680200 sec
 INFO: No Floating Point Exceptions have been reported
-     2,673,452,306      cycles                           #    2.953 GHz                    
-     4,096,581,433      instructions                     #    1.53  insn per cycle         
-       0.967198892 seconds time elapsed
+     2,612,649,294      cycles                           #    2.867 GHz                    
+     4,020,862,265      instructions                     #    1.54  insn per cycle         
+       0.973408337 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.042304e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.212707e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.212707e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.013555e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.178119e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.178119e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     6.467559 sec
+TOTAL       :     6.618295 sec
 INFO: No Floating Point Exceptions have been reported
-    19,491,750,695      cycles                           #    3.010 GHz                    
-    46,366,168,986      instructions                     #    2.38  insn per cycle         
-       6.476541865 seconds time elapsed
+    19,347,235,556      cycles                           #    2.921 GHz                    
+    46,262,872,265      instructions                     #    2.39  insn per cycle         
+       6.623644316 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  466) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.662736e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.194123e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.194123e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.622765e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.131585e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.131585e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.219503 sec
+TOTAL       :     4.292508 sec
 INFO: No Floating Point Exceptions have been reported
-    12,706,673,121      cycles                           #    3.006 GHz                    
-    31,586,088,348      instructions                     #    2.49  insn per cycle         
-       4.228514763 seconds time elapsed
+    12,523,530,255      cycles                           #    2.914 GHz                    
+    31,480,155,703      instructions                     #    2.51  insn per cycle         
+       4.298007948 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1720) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.015466e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.812156e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.812156e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.975666e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.736981e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.736981e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.548784 sec
+TOTAL       :     3.591492 sec
 INFO: No Floating Point Exceptions have been reported
-    10,222,806,702      cycles                           #    2.874 GHz                    
-    19,575,907,459      instructions                     #    1.91  insn per cycle         
-       3.557713338 seconds time elapsed
+    10,042,173,345      cycles                           #    2.793 GHz                    
+    19,471,578,015      instructions                     #    1.94  insn per cycle         
+       3.596768981 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2123) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.051557e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.890469e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.890469e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.012657e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.805102e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.805102e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.498884 sec
+TOTAL       :     3.533521 sec
 INFO: No Floating Point Exceptions have been reported
-    10,092,991,859      cycles                           #    2.879 GHz                    
-    19,324,671,897      instructions                     #    1.91  insn per cycle         
-       3.507900575 seconds time elapsed
+     9,885,722,561      cycles                           #    2.794 GHz                    
+    19,217,935,631      instructions                     #    1.94  insn per cycle         
+       3.538804173 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1866) (512y:  189) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.882298e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.563573e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.563573e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.788615e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.387860e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.387860e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.772337 sec
+TOTAL       :     3.929809 sec
 INFO: No Floating Point Exceptions have been reported
-     8,566,798,073      cycles                           #    2.266 GHz                    
-    15,161,524,534      instructions                     #    1.77  insn per cycle         
-       3.781171342 seconds time elapsed
+     8,344,069,760      cycles                           #    2.121 GHz                    
+    15,055,063,349      instructions                     #    1.80  insn per cycle         
+       3.935283980 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1044) (512y:  154) (512z: 1321)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt
index e7689b72e7..5d9a67393b 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 49s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-08_19:50:13
+DATE: 2024-08-29_22:47:51
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.539005e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.550707e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.172141e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.811602e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.711473e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.162788e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     0.661474 sec
+TOTAL       :     0.682237 sec
 INFO: No Floating Point Exceptions have been reported
-     2,649,580,670      cycles                           #    2.965 GHz                    
-     4,041,332,680      instructions                     #    1.53  insn per cycle         
-       0.953046472 seconds time elapsed
+     2,620,474,570      cycles                           #    2.866 GHz                    
+     4,036,897,619      instructions                     #    1.54  insn per cycle         
+       0.975308933 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.034608e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.202440e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.202440e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.011729e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.174976e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.174976e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     6.513220 sec
+TOTAL       :     6.629920 sec
 INFO: No Floating Point Exceptions have been reported
-    19,609,702,737      cycles                           #    3.007 GHz                    
-    46,307,035,647      instructions                     #    2.36  insn per cycle         
-       6.522463944 seconds time elapsed
+    19,385,164,241      cycles                           #    2.922 GHz                    
+    46,202,402,296      instructions                     #    2.38  insn per cycle         
+       6.635438882 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  453) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.657659e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.187172e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.187172e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.611858e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.122967e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.122967e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.231188 sec
+TOTAL       :     4.319506 sec
 INFO: No Floating Point Exceptions have been reported
-    12,732,843,853      cycles                           #    3.004 GHz                    
-    31,560,321,434      instructions                     #    2.48  insn per cycle         
-       4.240067788 seconds time elapsed
+    12,604,008,704      cycles                           #    2.915 GHz                    
+    31,454,601,193      instructions                     #    2.50  insn per cycle         
+       4.324876632 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1712) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.029457e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.843800e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.843800e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.970707e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.735506e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.735506e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.528354 sec
+TOTAL       :     3.603419 sec
 INFO: No Floating Point Exceptions have been reported
-    10,258,124,960      cycles                           #    2.901 GHz                    
-    19,565,249,837      instructions                     #    1.91  insn per cycle         
-       3.537275385 seconds time elapsed
+    10,044,338,252      cycles                           #    2.785 GHz                    
+    19,460,257,886      instructions                     #    1.94  insn per cycle         
+       3.608852495 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2107) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.049544e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.886035e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.886035e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.982213e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.748665e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.748665e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.497781 sec
+TOTAL       :     3.584722 sec
 INFO: No Floating Point Exceptions have been reported
-    10,124,826,634      cycles                           #    2.887 GHz                    
-    19,390,299,312      instructions                     #    1.92  insn per cycle         
-       3.507669206 seconds time elapsed
+     9,883,464,828      cycles                           #    2.754 GHz                    
+    19,283,001,699      instructions                     #    1.95  insn per cycle         
+       3.590131099 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1860) (512y:  189) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.905533e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.593731e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.593731e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.814525e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.432144e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.432144e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.733502 sec
+TOTAL       :     3.878171 sec
 INFO: No Floating Point Exceptions have been reported
-     8,422,503,642      cycles                           #    2.251 GHz                    
-    15,074,129,788      instructions                     #    1.79  insn per cycle         
-       3.742530520 seconds time elapsed
+     8,218,463,055      cycles                           #    2.117 GHz                    
+    14,967,397,857      instructions                     #    1.82  insn per cycle         
+       3.883633012 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1037) (512y:  156) (512z: 1305)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inlL_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inlL_hrd0.txt
new file mode 100644
index 0000000000..ec04bbfea8
--- /dev/null
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inlL_hrd0.txt
@@ -0,0 +1,229 @@
+
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 27s
+------------------------------------------------
+
+Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+BACKEND=cpp512y (was cppauto)
+OMPFLAGS=
+FPTYPE='d'
+HELINL='0'
+HRDCOD='0'
+HASCURAND=hasCurand
+HASHIPRAND=hasNoHiprand
+Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
+make: Nothing to be done for 'gtestlibs'.
+
+make USEBUILDDIR=1 BACKEND=cuda
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+
+make USEBUILDDIR=1 BACKEND=cppnone
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+
+make USEBUILDDIR=1 BACKEND=cppsse4
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+
+make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+
+make USEBUILDDIR=1 BACKEND=cpp512y
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+
+make USEBUILDDIR=1 BACKEND=cpp512z
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+
+DATE: 2024-08-30_00:39:59
+
+On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inlL_hrd0/check_cuda.exe -p 2048 256 12 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 5.610302e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.991689e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.331866e+08                 )  sec^-1
+MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
+TOTAL       :     0.708255 sec
+INFO: No Floating Point Exceptions have been reported
+     2,692,301,938      cycles                           #    2.850 GHz                    
+     4,257,089,204      instructions                     #    1.58  insn per cycle         
+       1.001838476 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inlL_hrd0/check_cuda.exe -p 2048 256 1
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 188
+==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inlL_hrd0/runTest_cuda.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inlL_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inlL_hrd0/fcheck_cuda.exe 2 64 2
+Avg ME (C++/GPU)   = 1.282804e-02
+Avg ME (F77/GPU)   = 1.2828039901590279E-002
+Relative difference = 7.671454200650844e-09
+OK (relative difference <= 5E-3)
+=========================================================================
+Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inlL_hrd0/check_hip.exe
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inlL_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
+EvtsPerSec[Rmb+ME]     (23) = ( 9.523704e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.097109e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.097109e+06                 )  sec^-1
+MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
+TOTAL       :     7.018377 sec
+INFO: No Floating Point Exceptions have been reported
+    20,491,530,301      cycles                           #    2.918 GHz                    
+    51,086,991,875      instructions                     #    2.49  insn per cycle         
+       7.023870200 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  154) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.282804e-02
+Avg ME (F77/C++)    = 1.2828039952548879E-002
+Relative difference = 3.6990156841838714e-09
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inlL_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 1.525942e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.971250e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.971250e+06                 )  sec^-1
+MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
+TOTAL       :     4.539759 sec
+INFO: No Floating Point Exceptions have been reported
+    13,293,505,034      cycles                           #    2.925 GHz                    
+    33,833,934,284      instructions                     #    2.55  insn per cycle         
+       4.545328210 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  534) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.282804e-02
+Avg ME (F77/C++)    = 1.2828039952548879E-002
+Relative difference = 3.6990156841838714e-09
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inlL_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 1.880529e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.563417e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.563417e+06                 )  sec^-1
+MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
+TOTAL       :     3.756514 sec
+INFO: No Floating Point Exceptions have been reported
+    10,394,658,431      cycles                           #    2.764 GHz                    
+    20,100,825,587      instructions                     #    1.93  insn per cycle         
+       3.762161970 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1266) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.282804e-02
+Avg ME (F77/C++)    = 1.2828039951670679E-002
+Relative difference = 3.767475112924841e-09
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inlL_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 1.950646e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.679844e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.679844e+06                 )  sec^-1
+MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
+TOTAL       :     3.634653 sec
+INFO: No Floating Point Exceptions have been reported
+    10,167,754,453      cycles                           #    2.794 GHz                    
+    20,007,924,479      instructions                     #    1.97  insn per cycle         
+       3.640231167 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1099) (512y:  174) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.282804e-02
+Avg ME (F77/C++)    = 1.2828039951670679E-002
+Relative difference = 3.767475112924841e-09
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inlL_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 1.685648e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.213687e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.213687e+06                 )  sec^-1
+MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
+TOTAL       :     4.142873 sec
+INFO: No Floating Point Exceptions have been reported
+     8,748,202,673      cycles                           #    2.109 GHz                    
+    15,970,372,239      instructions                     #    1.83  insn per cycle         
+       4.148444048 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1079) (512y:  164) (512z:  695)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.282804e-02
+Avg ME (F77/C++)    = 1.2828039951670679E-002
+Relative difference = 3.767475112924841e-09
+OK (relative difference <= 5E-3)
+=========================================================================
+
+TEST COMPLETED
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt
index 9114f2e8bd..bf281d75b3 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt
@@ -1,6 +1,6 @@
 
 ------------------------------------------------
-Preliminary build completed in 0d 00h 00m 25s
+Preliminary build completed in 0d 00h 00m 26s
 ------------------------------------------------
 
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
@@ -44,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-28_14:28:14
+DATE: 2024-08-29_22:48:22
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.589473e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.164485e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.280951e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.725749e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.169025e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.280824e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.528239 sec
+TOTAL       :     0.527233 sec
 INFO: No Floating Point Exceptions have been reported
-     2,222,057,027      cycles                           #    2.887 GHz                    
-     3,171,868,018      instructions                     #    1.43  insn per cycle         
-       0.826440817 seconds time elapsed
+     2,173,008,163      cycles                           #    2.867 GHz                    
+     3,142,274,275      instructions                     #    1.45  insn per cycle         
+       0.816679974 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -86,15 +86,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.823929e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.870912e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.870912e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.816184e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.863096e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.863096e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     5.856817 sec
+TOTAL       :     5.880537 sec
 INFO: No Floating Point Exceptions have been reported
-    17,161,452,897      cycles                           #    2.928 GHz                    
-    45,937,120,419      instructions                     #    2.68  insn per cycle         
-       5.862725573 seconds time elapsed
+    17,173,522,079      cycles                           #    2.918 GHz                    
+    45,940,447,501      instructions                     #    2.68  insn per cycle         
+       5.886318114 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  618) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -115,15 +115,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.126989e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.279897e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.279897e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.164036e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.319146e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.319146e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.462372 sec
+TOTAL       :     3.420599 sec
 INFO: No Floating Point Exceptions have been reported
-    10,017,721,294      cycles                           #    2.889 GHz                    
-    27,835,306,533      instructions                     #    2.78  insn per cycle         
-       3.467900747 seconds time elapsed
+    10,005,630,694      cycles                           #    2.921 GHz                    
+    27,835,659,377      instructions                     #    2.78  insn per cycle         
+       3.426171105 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2534) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -144,15 +144,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.028989e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.416165e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.416165e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.985768e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.359051e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.359051e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.192265 sec
+TOTAL       :     2.209915 sec
 INFO: No Floating Point Exceptions have been reported
-     6,072,828,199      cycles                           #    2.764 GHz                    
-    12,576,463,194      instructions                     #    2.07  insn per cycle         
-       2.197922075 seconds time elapsed
+     6,062,740,815      cycles                           #    2.738 GHz                    
+    12,577,042,076      instructions                     #    2.07  insn per cycle         
+       2.215350137 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2612) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -173,15 +173,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.485067e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.946458e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.946458e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.474101e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.932150e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.932150e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.018436 sec
+TOTAL       :     2.022046 sec
 INFO: No Floating Point Exceptions have been reported
-     5,586,234,830      cycles                           #    2.761 GHz                    
-    12,014,178,237      instructions                     #    2.15  insn per cycle         
-       2.024082473 seconds time elapsed
+     5,565,418,035      cycles                           #    2.746 GHz                    
+    12,015,644,834      instructions                     #    2.16  insn per cycle         
+       2.027619027 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2350) (512y:  144) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -202,15 +202,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.521988e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.705669e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.705669e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.500041e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.680913e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.680913e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.084504 sec
+TOTAL       :     3.102703 sec
 INFO: No Floating Point Exceptions have been reported
-     5,693,114,796      cycles                           #    1.843 GHz                    
-     8,291,693,937      instructions                     #    1.46  insn per cycle         
-       3.090277932 seconds time elapsed
+     5,686,124,170      cycles                           #    1.830 GHz                    
+     8,290,168,560      instructions                     #    1.46  insn per cycle         
+       3.108444392 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1429) (512y:  122) (512z: 1801)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt
index 20904d51fd..3c6e1bbc89 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 14s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_20:17:37
+DATE: 2024-08-29_23:28:10
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +57,15 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.670983e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.294260e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.294260e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.468536e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.988607e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.988607e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.801021 sec
+TOTAL       :     0.819103 sec
 INFO: No Floating Point Exceptions have been reported
-     3,080,158,706      cycles                           #    2.935 GHz                    
-     4,797,683,266      instructions                     #    1.56  insn per cycle         
-       1.107754362 seconds time elapsed
+     3,040,074,436      cycles                           #    2.872 GHz                    
+     4,744,943,802      instructions                     #    1.56  insn per cycle         
+       1.117026479 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
@@ -91,15 +95,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.860613e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.909257e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.909257e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.813891e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.859845e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.859845e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     5.862697 sec
+TOTAL       :     5.965795 sec
 INFO: No Floating Point Exceptions have been reported
-    17,649,346,443      cycles                           #    3.005 GHz                    
-    46,130,000,854      instructions                     #    2.61  insn per cycle         
-       5.874952134 seconds time elapsed
+    17,492,314,151      cycles                           #    2.929 GHz                    
+    46,000,593,104      instructions                     #    2.63  insn per cycle         
+       5.972479390 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  618) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -121,15 +125,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.216658e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.372905e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.372905e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.138423e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.289757e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.289757e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.488934 sec
+TOTAL       :     3.527164 sec
 INFO: No Floating Point Exceptions have been reported
-    10,528,637,782      cycles                           #    3.008 GHz                    
-    28,161,635,226      instructions                     #    2.67  insn per cycle         
-       3.501603953 seconds time elapsed
+    10,339,180,979      cycles                           #    2.927 GHz                    
+    28,019,312,732      instructions                     #    2.71  insn per cycle         
+       3.533781604 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2534) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -151,15 +155,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.020861e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.404928e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.404928e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.894274e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.257355e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.257355e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.319862 sec
+TOTAL       :     2.331879 sec
 INFO: No Floating Point Exceptions have been reported
-     6,615,013,287      cycles                           #    2.835 GHz                    
-    13,014,509,842      instructions                     #    1.97  insn per cycle         
-       2.334044597 seconds time elapsed
+     6,416,928,462      cycles                           #    2.745 GHz                    
+    12,863,992,862      instructions                     #    2.00  insn per cycle         
+       2.338617466 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2612) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -181,15 +185,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.540790e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.009639e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.009639e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.396753e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.845240e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.845240e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.122719 sec
+TOTAL       :     2.129662 sec
 INFO: No Floating Point Exceptions have been reported
-     6,074,435,637      cycles                           #    2.845 GHz                    
-    12,446,562,239      instructions                     #    2.05  insn per cycle         
-       2.135603783 seconds time elapsed
+     5,897,393,565      cycles                           #    2.762 GHz                    
+    12,300,032,879      instructions                     #    2.09  insn per cycle         
+       2.136373131 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2350) (512y:  144) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -211,15 +215,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.615591e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.807268e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.807268e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.467289e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.644923e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.644923e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.133757 sec
+TOTAL       :     3.212063 sec
 INFO: No Floating Point Exceptions have been reported
-     6,213,946,932      cycles                           #    1.975 GHz                    
-     8,678,322,888      instructions                     #    1.40  insn per cycle         
-       3.146596624 seconds time elapsed
+     6,045,170,156      cycles                           #    1.879 GHz                    
+     8,535,674,778      instructions                     #    1.41  insn per cycle         
+       3.218783444 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1429) (512y:  122) (512z: 1801)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt
index 278ba4b157..444ccae050 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 01s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_20:29:01
+DATE: 2024-08-29_23:39:47
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.861886e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.169373e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.276724e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.724255e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.167416e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.278497e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079401e+00 +- 3.402993e-03 )  GeV^0
-TOTAL       :     0.622862 sec
+TOTAL       :     0.623921 sec
 INFO: No Floating Point Exceptions have been reported
-     2,496,588,832      cycles                           #    2.937 GHz                    
-     3,616,944,645      instructions                     #    1.45  insn per cycle         
-       0.908999824 seconds time elapsed
+     2,441,535,187      cycles                           #    2.863 GHz                    
+     3,583,960,573      instructions                     #    1.47  insn per cycle         
+       0.909280119 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.858770e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.906877e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.906877e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.823676e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.870580e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.870580e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079401e+00 +- 3.402993e-03 )  GeV^0
-TOTAL       :     5.824941 sec
+TOTAL       :     5.917583 sec
 INFO: No Floating Point Exceptions have been reported
-    17,438,858,484      cycles                           #    2.991 GHz                    
-    46,011,567,715      instructions                     #    2.64  insn per cycle         
-       5.831016559 seconds time elapsed
+    17,344,273,898      cycles                           #    2.929 GHz                    
+    45,956,921,757      instructions                     #    2.65  insn per cycle         
+       5.922963627 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  618) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.238383e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.396939e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.396939e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.157376e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.311663e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.311663e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079401e+00 +- 3.402993e-03 )  GeV^0
-TOTAL       :     3.423850 sec
+TOTAL       :     3.489223 sec
 INFO: No Floating Point Exceptions have been reported
-    10,272,842,406      cycles                           #    2.996 GHz                    
-    27,901,302,334      instructions                     #    2.72  insn per cycle         
-       3.429671541 seconds time elapsed
+    10,185,833,098      cycles                           #    2.916 GHz                    
+    27,834,944,277      instructions                     #    2.73  insn per cycle         
+       3.494676127 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2534) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.121821e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.516246e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.516246e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.993976e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.367912e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.367912e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079401e+00 +- 3.402993e-03 )  GeV^0
-TOTAL       :     2.235299 sec
+TOTAL       :     2.267251 sec
 INFO: No Floating Point Exceptions have been reported
-     6,354,923,604      cycles                           #    2.835 GHz                    
-    12,634,246,195      instructions                     #    1.99  insn per cycle         
-       2.242096681 seconds time elapsed
+     6,259,034,691      cycles                           #    2.755 GHz                    
+    12,559,540,754      instructions                     #    2.01  insn per cycle         
+       2.272735159 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2612) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.585808e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.053603e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.053603e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.467063e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.922468e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.922468e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079401e+00 +- 3.402993e-03 )  GeV^0
-TOTAL       :     2.059756 sec
+TOTAL       :     2.086090 sec
 INFO: No Floating Point Exceptions have been reported
-     5,815,690,450      cycles                           #    2.817 GHz                    
-    12,015,299,257      instructions                     #    2.07  insn per cycle         
-       2.065558377 seconds time elapsed
+     5,749,773,653      cycles                           #    2.750 GHz                    
+    11,964,142,779      instructions                     #    2.08  insn per cycle         
+       2.091530563 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2350) (512y:  144) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.643854e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.839235e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.839235e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.501169e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.680263e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.680263e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079401e+00 +- 3.402993e-03 )  GeV^0
-TOTAL       :     3.061355 sec
+TOTAL       :     3.164368 sec
 INFO: No Floating Point Exceptions have been reported
-     5,933,052,882      cycles                           #    1.935 GHz                    
-     8,290,148,322      instructions                     #    1.40  insn per cycle         
-       3.067159573 seconds time elapsed
+     5,872,294,086      cycles                           #    1.853 GHz                    
+     8,241,399,099      instructions                     #    1.40  insn per cycle         
+       3.169920538 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1429) (512y:  122) (512z: 1801)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt
index fba3b57280..811ce8dae0 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 00s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_20:26:14
+DATE: 2024-08-29_23:36:57
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.905617e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.179466e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.279851e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.722197e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.167128e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.278372e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.555994 sec
+TOTAL       :     0.563802 sec
 INFO: No Floating Point Exceptions have been reported
-     2,284,248,162      cycles                           #    2.910 GHz                    
-     3,522,733,929      instructions                     #    1.54  insn per cycle         
-       0.842109172 seconds time elapsed
+     2,262,716,835      cycles                           #    2.865 GHz                    
+     3,565,491,409      instructions                     #    1.58  insn per cycle         
+       0.848472383 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.864505e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.911828e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.911828e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.813802e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.860264e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.860264e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     5.728269 sec
+TOTAL       :     5.887032 sec
 INFO: No Floating Point Exceptions have been reported
-    17,201,286,704      cycles                           #    3.001 GHz                    
-    45,937,216,481      instructions                     #    2.67  insn per cycle         
-       5.733811627 seconds time elapsed
+    17,158,565,283      cycles                           #    2.913 GHz                    
+    45,937,142,543      instructions                     #    2.68  insn per cycle         
+       5.892364010 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  618) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.250062e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.410672e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.410672e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.164067e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.318693e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.318693e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.334295 sec
+TOTAL       :     3.421238 sec
 INFO: No Floating Point Exceptions have been reported
-    10,038,224,892      cycles                           #    3.006 GHz                    
-    27,841,209,673      instructions                     #    2.77  insn per cycle         
-       3.340129450 seconds time elapsed
+    10,010,510,882      cycles                           #    2.922 GHz                    
+    27,835,852,414      instructions                     #    2.78  insn per cycle         
+       3.426638440 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2534) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.145160e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.541205e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.541205e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.987212e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.363161e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.363161e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.147149 sec
+TOTAL       :     2.209515 sec
 INFO: No Floating Point Exceptions have been reported
-     6,102,474,947      cycles                           #    2.835 GHz                    
-    12,591,341,324      instructions                     #    2.06  insn per cycle         
-       2.153315340 seconds time elapsed
+     6,080,580,168      cycles                           #    2.747 GHz                    
+    12,577,088,822      instructions                     #    2.07  insn per cycle         
+       2.214907728 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2612) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.639021e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.126234e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.126234e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.476364e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.932446e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.932446e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     1.968961 sec
+TOTAL       :     2.020917 sec
 INFO: No Floating Point Exceptions have been reported
-     5,608,749,777      cycles                           #    2.841 GHz                    
-    12,024,185,128      instructions                     #    2.14  insn per cycle         
-       1.975078079 seconds time elapsed
+     5,548,645,581      cycles                           #    2.739 GHz                    
+    12,014,896,755      instructions                     #    2.17  insn per cycle         
+       2.026414780 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2350) (512y:  144) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.641587e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.834103e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.834103e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.468179e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.645722e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.645722e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.988248 sec
+TOTAL       :     3.129420 sec
 INFO: No Floating Point Exceptions have been reported
-     5,720,578,029      cycles                           #    1.911 GHz                    
-     8,299,459,915      instructions                     #    1.45  insn per cycle         
-       2.994289958 seconds time elapsed
+     5,691,542,603      cycles                           #    1.816 GHz                    
+     8,292,662,387      instructions                     #    1.46  insn per cycle         
+       3.134892980 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1429) (512y:  122) (512z: 1801)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt
index 9e3fe4acb0..d3b910a217 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 01s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_20:23:31
+DATE: 2024-08-29_23:34:11
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -50,15 +54,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.032256e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.173338e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.277454e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.834082e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.166063e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.277877e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.705014 sec
+TOTAL       :     0.710983 sec
 INFO: No Floating Point Exceptions have been reported
-     2,749,776,676      cycles                           #    2.945 GHz                    
-     4,325,337,591      instructions                     #    1.57  insn per cycle         
-       0.991327218 seconds time elapsed
+     2,705,219,930      cycles                           #    2.879 GHz                    
+     4,282,979,112      instructions                     #    1.58  insn per cycle         
+       0.996465503 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
@@ -84,15 +88,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.868158e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.916528e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.916528e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.818437e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.864880e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.864880e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     5.717662 sec
+TOTAL       :     5.872383 sec
 INFO: No Floating Point Exceptions have been reported
-    17,178,289,091      cycles                           #    3.002 GHz                    
-    45,937,241,973      instructions                     #    2.67  insn per cycle         
-       5.723215350 seconds time elapsed
+    17,160,050,681      cycles                           #    2.920 GHz                    
+    45,940,289,918      instructions                     #    2.68  insn per cycle         
+       5.877757334 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  618) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -113,15 +117,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.231136e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.391441e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.391441e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.169677e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.325030e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.325030e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.354044 sec
+TOTAL       :     3.414466 sec
 INFO: No Floating Point Exceptions have been reported
-    10,031,479,526      cycles                           #    2.986 GHz                    
-    27,844,808,096      instructions                     #    2.78  insn per cycle         
-       3.359952965 seconds time elapsed
+    10,011,601,048      cycles                           #    2.928 GHz                    
+    27,837,161,782      instructions                     #    2.78  insn per cycle         
+       3.419970240 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2534) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -142,15 +146,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.099162e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.490827e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.490827e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.899817e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.264552e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.264552e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.161987 sec
+TOTAL       :     2.246785 sec
 INFO: No Floating Point Exceptions have been reported
-     6,083,392,852      cycles                           #    2.808 GHz                    
-    12,576,453,088      instructions                     #    2.07  insn per cycle         
-       2.167500908 seconds time elapsed
+     6,167,664,851      cycles                           #    2.739 GHz                    
+    12,576,881,625      instructions                     #    2.04  insn per cycle         
+       2.252386496 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2612) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -171,15 +175,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.632481e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.118699e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.118699e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.481532e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.938711e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.938711e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     1.966228 sec
+TOTAL       :     2.019702 sec
 INFO: No Floating Point Exceptions have been reported
-     5,587,261,117      cycles                           #    2.835 GHz                    
-    12,016,452,187      instructions                     #    2.15  insn per cycle         
-       1.971550633 seconds time elapsed
+     5,565,720,220      cycles                           #    2.749 GHz                    
+    12,015,098,672      instructions                     #    2.16  insn per cycle         
+       2.025137451 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2350) (512y:  144) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -200,15 +204,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.687020e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.882322e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.882322e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.505249e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.686404e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.686404e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.948998 sec
+TOTAL       :     3.097156 sec
 INFO: No Floating Point Exceptions have been reported
-     5,710,948,756      cycles                           #    1.934 GHz                    
-     8,289,147,048      instructions                     #    1.45  insn per cycle         
-       2.954636423 seconds time elapsed
+     5,693,059,407      cycles                           #    1.835 GHz                    
+     8,290,060,020      instructions                     #    1.46  insn per cycle         
+       3.102640874 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1429) (512y:  122) (512z: 1801)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt
index dd8639d462..8c4732a972 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 49s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_19:51:07
+DATE: 2024-08-29_22:48:47
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.953365e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.169057e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.275879e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.725379e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.168865e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.278488e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.516826 sec
+TOTAL       :     0.526687 sec
 INFO: No Floating Point Exceptions have been reported
-     2,205,203,774      cycles                           #    2.951 GHz                    
-     3,179,876,331      instructions                     #    1.44  insn per cycle         
-       0.803907668 seconds time elapsed
+     2,160,622,776      cycles                           #    2.845 GHz                    
+     3,135,821,431      instructions                     #    1.45  insn per cycle         
+       0.815922370 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.926342e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.977633e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.977633e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.868877e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.918216e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.918216e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     5.581069 sec
+TOTAL       :     5.716529 sec
 INFO: No Floating Point Exceptions have been reported
-    16,849,073,106      cycles                           #    3.014 GHz                    
-    45,045,731,432      instructions                     #    2.67  insn per cycle         
-       5.590685845 seconds time elapsed
+    16,714,662,218      cycles                           #    2.922 GHz                    
+    44,929,451,831      instructions                     #    2.69  insn per cycle         
+       5.721855786 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  568) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.423058e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.602908e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.602908e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.323426e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.496662e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.496662e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.201422 sec
+TOTAL       :     3.263820 sec
 INFO: No Floating Point Exceptions have been reported
-     9,674,035,774      cycles                           #    3.013 GHz                    
-    26,815,165,030      instructions                     #    2.77  insn per cycle         
-       3.211231348 seconds time elapsed
+     9,527,140,435      cycles                           #    2.915 GHz                    
+    26,694,078,018      instructions                     #    2.80  insn per cycle         
+       3.269292343 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2331) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.649217e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.990962e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.990962e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.562467e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.880552e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.880552e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.396466 sec
+TOTAL       :     2.405499 sec
 INFO: No Floating Point Exceptions have been reported
-     6,732,899,102      cycles                           #    2.799 GHz                    
-    14,237,973,279      instructions                     #    2.11  insn per cycle         
-       2.406196706 seconds time elapsed
+     6,601,230,669      cycles                           #    2.739 GHz                    
+    14,116,197,972      instructions                     #    2.14  insn per cycle         
+       2.410986894 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2703) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.923382e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.291610e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.291610e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.773473e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.112833e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.112833e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.269821 sec
+TOTAL       :     2.302668 sec
 INFO: No Floating Point Exceptions have been reported
-     6,473,185,925      cycles                           #    2.841 GHz                    
-    13,823,290,533      instructions                     #    2.14  insn per cycle         
-       2.279550700 seconds time elapsed
+     6,319,983,051      cycles                           #    2.739 GHz                    
+    13,699,915,851      instructions                     #    2.17  insn per cycle         
+       2.308121633 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2349) (512y:  297) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.570682e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.758312e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.758312e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.364540e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.530799e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.530799e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.077054 sec
+TOTAL       :     3.222914 sec
 INFO: No Floating Point Exceptions have been reported
-     6,015,923,061      cycles                           #    1.950 GHz                    
-    10,176,638,000      instructions                     #    1.69  insn per cycle         
-       3.086647254 seconds time elapsed
+     5,894,748,475      cycles                           #    1.827 GHz                    
+    10,058,448,006      instructions                     #    1.71  insn per cycle         
+       3.228425871 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1261) (512y:  208) (512z: 1987)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt
index 0bede2793b..d808eb7b63 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt
@@ -1,6 +1,6 @@
 
 ------------------------------------------------
-Preliminary build completed in 0d 00h 00m 18s
+Preliminary build completed in 0d 00h 00m 31s
 ------------------------------------------------
 
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
@@ -44,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-28_14:28:38
+DATE: 2024-08-29_23:19:12
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.586606e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.163939e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.280339e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.580953e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.164243e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.279779e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.525735 sec
+TOTAL       :     0.528023 sec
 INFO: No Floating Point Exceptions have been reported
-     2,232,365,149      cycles                           #    2.890 GHz                    
-     3,182,956,973      instructions                     #    1.43  insn per cycle         
-       0.829750392 seconds time elapsed
+     2,213,384,103      cycles                           #    2.873 GHz                    
+     3,159,522,648      instructions                     #    1.43  insn per cycle         
+       0.827781723 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -86,15 +86,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.432883e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.516480e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.516480e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.417645e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.500903e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.500903e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     4.416414 sec
+TOTAL       :     4.443506 sec
 INFO: No Floating Point Exceptions have been reported
-    12,990,944,477      cycles                           #    2.938 GHz                    
-    34,329,027,064      instructions                     #    2.64  insn per cycle         
-       4.422282858 seconds time elapsed
+    12,992,900,472      cycles                           #    2.921 GHz                    
+    34,329,063,073      instructions                     #    2.64  insn per cycle         
+       4.449379965 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  665) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe
@@ -115,15 +115,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.972324e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.108975e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.108975e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.973650e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.110657e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.110657e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.636495 sec
+TOTAL       :     3.634011 sec
 INFO: No Floating Point Exceptions have been reported
-    10,687,597,744      cycles                           #    2.935 GHz                    
-    24,000,551,707      instructions                     #    2.25  insn per cycle         
-       3.642117513 seconds time elapsed
+    10,678,102,516      cycles                           #    2.934 GHz                    
+    23,998,547,916      instructions                     #    2.25  insn per cycle         
+       3.639975393 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2571) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe
@@ -144,15 +144,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.628578e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.954501e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.954501e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.579404e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.901013e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.901013e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.372776 sec
+TOTAL       :     2.398468 sec
 INFO: No Floating Point Exceptions have been reported
-     6,572,155,340      cycles                           #    2.764 GHz                    
-    12,342,988,553      instructions                     #    1.88  insn per cycle         
-       2.378340216 seconds time elapsed
+     6,583,807,996      cycles                           #    2.739 GHz                    
+    12,341,554,114      instructions                     #    1.87  insn per cycle         
+       2.404741988 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3096) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe
@@ -173,15 +173,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.926591e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.300605e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.300605e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.932760e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.298403e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.298403e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.235661 sec
+TOTAL       :     2.233613 sec
 INFO: No Floating Point Exceptions have been reported
-     6,180,900,661      cycles                           #    2.759 GHz                    
-    11,564,434,089      instructions                     #    1.87  insn per cycle         
-       2.241440916 seconds time elapsed
+     6,166,393,584      cycles                           #    2.754 GHz                    
+    11,564,413,084      instructions                     #    1.88  insn per cycle         
+       2.239534572 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2640) (512y:  239) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe
@@ -202,15 +202,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.738770e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.946470e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.946470e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.724843e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.929225e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.929225e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.911722 sec
+TOTAL       :     2.922222 sec
 INFO: No Floating Point Exceptions have been reported
-     5,383,828,846      cycles                           #    1.846 GHz                    
-     9,280,485,426      instructions                     #    1.72  insn per cycle         
-       2.917468081 seconds time elapsed
+     5,395,079,092      cycles                           #    1.844 GHz                    
+     9,281,282,823      instructions                     #    1.72  insn per cycle         
+       2.928247598 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2084) (512y:  282) (512z: 1954)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt
index 65dd600686..5ff2869e2d 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 27s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_20:09:18
+DATE: 2024-08-29_23:19:36
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.067308e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.179547e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.276758e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.561341e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.161596e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.277122e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.523442 sec
+TOTAL       :     0.531876 sec
 INFO: No Floating Point Exceptions have been reported
-     2,203,163,418      cycles                           #    2.923 GHz                    
-     3,173,114,436      instructions                     #    1.44  insn per cycle         
-       0.812619708 seconds time elapsed
+     2,190,984,202      cycles                           #    2.865 GHz                    
+     3,162,138,864      instructions                     #    1.44  insn per cycle         
+       0.823306138 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.597347e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.694908e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.694908e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.555546e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.648822e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.648822e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     4.173436 sec
+TOTAL       :     4.209735 sec
 INFO: No Floating Point Exceptions have been reported
-    12,532,788,513      cycles                           #    2.997 GHz                    
-    35,033,869,738      instructions                     #    2.80  insn per cycle         
-       4.183331959 seconds time elapsed
+    12,311,073,638      cycles                           #    2.921 GHz                    
+    34,900,214,419      instructions                     #    2.83  insn per cycle         
+       4.215528854 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  430) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.046469e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.187931e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.187931e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.946352e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.082788e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.082788e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.579716 sec
+TOTAL       :     3.666951 sec
 INFO: No Floating Point Exceptions have been reported
-    10,790,492,364      cycles                           #    3.007 GHz                    
-    23,124,229,685      instructions                     #    2.14  insn per cycle         
-       3.589416563 seconds time elapsed
+    10,672,048,366      cycles                           #    2.906 GHz                    
+    23,002,190,728      instructions                     #    2.16  insn per cycle         
+       3.672904066 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2339) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.059739e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.450926e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.450926e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.911282e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.277398e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.277398e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.211695 sec
+TOTAL       :     2.242288 sec
 INFO: No Floating Point Exceptions have been reported
-     6,295,892,975      cycles                           #    2.836 GHz                    
-    12,072,618,893      instructions                     #    1.92  insn per cycle         
-       2.220989978 seconds time elapsed
+     6,169,794,712      cycles                           #    2.745 GHz                    
+    11,950,055,335      instructions                     #    1.94  insn per cycle         
+       2.248190170 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2484) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.997474e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.374849e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.374849e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.019160e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.398604e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.398604e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.235122 sec
+TOTAL       :     2.196254 sec
 INFO: No Floating Point Exceptions have been reported
-     6,279,000,139      cycles                           #    2.798 GHz                    
-    11,243,252,484      instructions                     #    1.79  insn per cycle         
-       2.244690704 seconds time elapsed
+     6,049,955,238      cycles                           #    2.748 GHz                    
+    11,121,316,368      instructions                     #    1.84  insn per cycle         
+       2.202221657 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2095) (512y:  174) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.095312e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.342354e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.342354e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.772228e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.982193e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.982193e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.697672 sec
+TOTAL       :     2.886677 sec
 INFO: No Floating Point Exceptions have been reported
-     5,310,077,423      cycles                           #    1.962 GHz                    
-     9,140,837,043      instructions                     #    1.72  insn per cycle         
-       2.707468994 seconds time elapsed
+     5,316,806,565      cycles                           #    1.839 GHz                    
+     9,020,560,556      instructions                     #    1.70  insn per cycle         
+       2.892692780 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1620) (512y:  208) (512z: 1570)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inlL_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inlL_hrd0.txt
index 0b16978fc1..75425bce3c 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inlL_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inlL_hrd0.txt
@@ -1,6 +1,6 @@
 
 ------------------------------------------------
-Preliminary build completed in 0d 00h 00m 16s
+Preliminary build completed in 0d 00h 00m 27s
 ------------------------------------------------
 
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
@@ -44,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-28_14:29:02
+DATE: 2024-08-30_00:40:30
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.667135e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.116115e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.251573e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.697972e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.122560e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.251638e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.550450 sec
+TOTAL       :     0.550186 sec
 INFO: No Floating Point Exceptions have been reported
-     2,272,219,097      cycles                           #    2.889 GHz                    
-     3,361,475,195      instructions                     #    1.48  insn per cycle         
-       0.842685843 seconds time elapsed
+     2,254,519,804      cycles                           #    2.871 GHz                    
+     3,330,742,104      instructions                     #    1.48  insn per cycle         
+       0.841959847 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inlL_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 190
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -86,15 +86,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=L] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.832998e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.879765e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.879765e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.818720e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.866638e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.866638e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     5.828276 sec
+TOTAL       :     5.872619 sec
 INFO: No Floating Point Exceptions have been reported
-    17,179,972,160      cycles                           #    2.946 GHz                    
-    46,155,122,198      instructions                     #    2.69  insn per cycle         
-       5.834030419 seconds time elapsed
+    17,222,612,558      cycles                           #    2.930 GHz                    
+    46,158,403,650      instructions                     #    2.68  insn per cycle         
+       5.878386529 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  273) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inlL_hrd0/runTest_cpp.exe
@@ -115,15 +115,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=L] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.183851e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.339899e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.339899e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.180082e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.336925e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.336925e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.400678 sec
+TOTAL       :     3.404238 sec
 INFO: No Floating Point Exceptions have been reported
-    10,026,337,742      cycles                           #    2.944 GHz                    
-    28,036,704,962      instructions                     #    2.80  insn per cycle         
-       3.406380757 seconds time elapsed
+     9,980,416,956      cycles                           #    2.928 GHz                    
+    28,036,691,489      instructions                     #    2.81  insn per cycle         
+       3.410012374 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  874) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inlL_hrd0/runTest_cpp.exe
@@ -144,15 +144,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=L] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.970649e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.360066e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.360066e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.964606e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.337711e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.337711e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.218211 sec
+TOTAL       :     2.218308 sec
 INFO: No Floating Point Exceptions have been reported
-     6,090,497,709      cycles                           #    2.740 GHz                    
-    12,538,833,027      instructions                     #    2.06  insn per cycle         
-       2.223935212 seconds time elapsed
+     6,091,712,086      cycles                           #    2.741 GHz                    
+    12,540,485,968      instructions                     #    2.06  insn per cycle         
+       2.223801766 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1544) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inlL_hrd0/runTest_cpp.exe
@@ -173,15 +173,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=L] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.364093e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.809257e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.809257e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.437127e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.884215e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.884215e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.061967 sec
+TOTAL       :     2.034337 sec
 INFO: No Floating Point Exceptions have been reported
-     5,638,892,941      cycles                           #    2.729 GHz                    
-    12,020,058,201      instructions                     #    2.13  insn per cycle         
-       2.067543648 seconds time elapsed
+     5,590,116,287      cycles                           #    2.742 GHz                    
+    12,020,168,002      instructions                     #    2.15  insn per cycle         
+       2.039842201 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1317) (512y:  144) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inlL_hrd0/runTest_cpp.exe
@@ -202,15 +202,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=L] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.493455e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.675955e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.675955e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.467933e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.644297e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.644297e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.109485 sec
+TOTAL       :     3.130618 sec
 INFO: No Floating Point Exceptions have been reported
-     5,760,815,236      cycles                           #    1.850 GHz                    
-     8,461,440,763      instructions                     #    1.47  insn per cycle         
-       3.115293443 seconds time elapsed
+     5,757,949,769      cycles                           #    1.836 GHz                    
+     8,462,126,118      instructions                     #    1.47  insn per cycle         
+       3.136400818 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1414) (512y:  122) (512z:  860)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inlL_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt
index 38766f6059..f0c0458a56 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 54s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_19:51:32
+DATE: 2024-08-29_22:49:12
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.614637e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.196490e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.391083e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.071799e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.189965e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.392859e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086719e+00 +- 3.413389e-03 )  GeV^0
-TOTAL       :     0.477185 sec
+TOTAL       :     0.480577 sec
 INFO: No Floating Point Exceptions have been reported
-     2,083,240,592      cycles                           #    2.927 GHz                    
-     2,954,253,066      instructions                     #    1.42  insn per cycle         
-       0.768394565 seconds time elapsed
+     2,038,597,081      cycles                           #    2.857 GHz                    
+     2,938,312,083      instructions                     #    1.44  insn per cycle         
+       0.770278973 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 149
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.972261e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.028190e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.028190e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.915621e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.969220e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.969220e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086780e+00 +- 3.413794e-03 )  GeV^0
-TOTAL       :     5.413447 sec
+TOTAL       :     5.557589 sec
 INFO: No Floating Point Exceptions have been reported
-    16,298,510,952      cycles                           #    3.008 GHz                    
-    45,383,093,310      instructions                     #    2.78  insn per cycle         
-       5.420499578 seconds time elapsed
+    16,233,982,752      cycles                           #    2.919 GHz                    
+    45,332,344,917      instructions                     #    2.79  insn per cycle         
+       5.562666162 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  592) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.516274e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.853993e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.853993e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.521998e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.857013e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.857013e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086779e+00 +- 3.413793e-03 )  GeV^0
-TOTAL       :     2.420950 sec
+TOTAL       :     2.403382 sec
 INFO: No Floating Point Exceptions have been reported
-     7,111,183,634      cycles                           #    2.930 GHz                    
-    17,819,948,567      instructions                     #    2.51  insn per cycle         
-       2.427658659 seconds time elapsed
+     7,048,724,233      cycles                           #    2.928 GHz                    
+    17,767,905,011      instructions                     #    2.52  insn per cycle         
+       2.408503086 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.607320e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.824778e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.824778e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.219776e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.310619e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.310619e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.317016 sec
+TOTAL       :     1.360374 sec
 INFO: No Floating Point Exceptions have been reported
-     3,802,543,905      cycles                           #    2.874 GHz                    
-     8,308,913,768      instructions                     #    2.19  insn per cycle         
-       1.323729586 seconds time elapsed
+     3,741,111,926      cycles                           #    2.741 GHz                    
+     8,257,978,042      instructions                     #    2.21  insn per cycle         
+       1.365656837 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3350) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.087676e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.047463e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.047463e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.800434e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.005315e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.005315e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.251137 sec
+TOTAL       :     1.275551 sec
 INFO: No Floating Point Exceptions have been reported
-     3,608,199,910      cycles                           #    2.871 GHz                    
-     7,963,896,839      instructions                     #    2.21  insn per cycle         
-       1.257792419 seconds time elapsed
+     3,534,104,068      cycles                           #    2.761 GHz                    
+     7,913,123,613      instructions                     #    2.24  insn per cycle         
+       1.280701746 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3196) (512y:   20) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.851468e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.561768e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.561768e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.455759e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.102326e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.102326e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.629029 sec
+TOTAL       :     1.709162 sec
 INFO: No Floating Point Exceptions have been reported
-     3,306,960,550      cycles                           #    2.023 GHz                    
-     6,143,321,587      instructions                     #    1.86  insn per cycle         
-       1.635836688 seconds time elapsed
+     3,253,158,863      cycles                           #    1.899 GHz                    
+     6,093,971,876      instructions                     #    1.87  insn per cycle         
+       1.714418078 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2287) (512y:   24) (512z: 2153)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt
index 87c93d2ebd..8326a87b8d 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 04s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_20:18:02
+DATE: 2024-08-29_23:28:36
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +57,15 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.181597e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.725510e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.725510e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.951895e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.416585e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.416585e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086805e+00 +- 3.414078e-03 )  GeV^0
-TOTAL       :     0.672294 sec
+TOTAL       :     0.676117 sec
 INFO: No Floating Point Exceptions have been reported
-     2,617,099,456      cycles                           #    2.904 GHz                    
-     4,062,920,786      instructions                     #    1.55  insn per cycle         
-       0.957784001 seconds time elapsed
+     2,630,316,460      cycles                           #    2.880 GHz                    
+     4,097,364,848      instructions                     #    1.56  insn per cycle         
+       0.971302668 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
@@ -91,15 +95,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.956957e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.011198e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.011198e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.896591e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.949047e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.949047e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086780e+00 +- 3.413794e-03 )  GeV^0
-TOTAL       :     5.484325 sec
+TOTAL       :     5.656810 sec
 INFO: No Floating Point Exceptions have been reported
-    16,490,289,692      cycles                           #    3.004 GHz                    
-    45,381,699,221      instructions                     #    2.75  insn per cycle         
-       5.490323533 seconds time elapsed
+    16,419,925,580      cycles                           #    2.900 GHz                    
+    45,375,599,039      instructions                     #    2.76  insn per cycle         
+       5.663172482 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  592) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -121,15 +125,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.582859e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.920444e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.920444e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.458760e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.790118e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.790118e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086779e+00 +- 3.413793e-03 )  GeV^0
-TOTAL       :     2.418229 sec
+TOTAL       :     2.482654 sec
 INFO: No Floating Point Exceptions have been reported
-     7,267,277,115      cycles                           #    2.998 GHz                    
-    18,050,295,436      instructions                     #    2.48  insn per cycle         
-       2.424701000 seconds time elapsed
+     7,237,921,119      cycles                           #    2.909 GHz                    
+    18,048,078,725      instructions                     #    2.49  insn per cycle         
+       2.488857904 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -151,15 +155,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.393268e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.547596e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.547596e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.197371e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.278269e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.278269e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.379855 sec
+TOTAL       :     1.409020 sec
 INFO: No Floating Point Exceptions have been reported
-     3,938,588,665      cycles                           #    2.843 GHz                    
-     8,495,556,645      instructions                     #    2.16  insn per cycle         
-       1.386260790 seconds time elapsed
+     3,922,879,812      cycles                           #    2.773 GHz                    
+     8,495,549,912      instructions                     #    2.17  insn per cycle         
+       1.415323468 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3350) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -181,15 +185,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.873570e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.014552e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.014552e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.667680e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.889347e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.889347e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.313964 sec
+TOTAL       :     1.341925 sec
 INFO: No Floating Point Exceptions have been reported
-     3,770,505,615      cycles                           #    2.857 GHz                    
-     8,157,653,367      instructions                     #    2.16  insn per cycle         
-       1.320625840 seconds time elapsed
+     3,729,600,082      cycles                           #    2.768 GHz                    
+     8,150,649,069      instructions                     #    2.19  insn per cycle         
+       1.348181911 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3196) (512y:   20) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -211,15 +215,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.668614e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.340392e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.340392e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.360708e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.989495e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.989495e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.706792 sec
+TOTAL       :     1.780482 sec
 INFO: No Floating Point Exceptions have been reported
-     3,475,092,320      cycles                           #    2.029 GHz                    
-     6,350,458,775      instructions                     #    1.83  insn per cycle         
-       1.713327675 seconds time elapsed
+     3,455,332,957      cycles                           #    1.935 GHz                    
+     6,348,114,195      instructions                     #    1.84  insn per cycle         
+       1.786742043 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2287) (512y:   24) (512z: 2153)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt
index a8425bb782..71fa71b4dc 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 00s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_20:29:26
+DATE: 2024-08-29_23:40:12
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.044161e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.197356e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.390140e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.068568e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.187560e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.391526e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079446e+00 +- 3.403306e-03 )  GeV^0
-TOTAL       :     0.573091 sec
+TOTAL       :     0.573943 sec
 INFO: No Floating Point Exceptions have been reported
-     2,302,500,947      cycles                           #    2.899 GHz                    
-     3,359,714,134      instructions                     #    1.46  insn per cycle         
-       0.851330175 seconds time elapsed
+     2,277,708,206      cycles                           #    2.874 GHz                    
+     3,358,053,416      instructions                     #    1.47  insn per cycle         
+       0.850275167 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 149
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.971169e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.027848e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.027848e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.917195e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.972239e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.972239e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079573e+00 +- 3.404712e-03 )  GeV^0
-TOTAL       :     5.460102 sec
+TOTAL       :     5.612722 sec
 INFO: No Floating Point Exceptions have been reported
-    16,412,251,635      cycles                           #    3.004 GHz                    
-    45,363,438,738      instructions                     #    2.76  insn per cycle         
-       5.465223733 seconds time elapsed
+    16,407,033,244      cycles                           #    2.921 GHz                    
+    45,362,251,036      instructions                     #    2.76  insn per cycle         
+       5.617968376 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  592) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.639399e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.984668e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.984668e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.525760e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.861516e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.861516e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079572e+00 +- 3.404712e-03 )  GeV^0
-TOTAL       :     2.397788 sec
+TOTAL       :     2.459117 sec
 INFO: No Floating Point Exceptions have been reported
-     7,225,778,706      cycles                           #    3.008 GHz                    
-    17,780,590,298      instructions                     #    2.46  insn per cycle         
-       2.402807836 seconds time elapsed
+     7,212,908,479      cycles                           #    2.929 GHz                    
+    17,780,160,206      instructions                     #    2.47  insn per cycle         
+       2.464386875 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.542458e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.724935e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.724935e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.204922e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.294906e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.294906e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079550e+00 +- 3.404207e-03 )  GeV^0
-TOTAL       :     1.365171 sec
+TOTAL       :     1.419201 sec
 INFO: No Floating Point Exceptions have been reported
-     3,905,630,598      cycles                           #    2.852 GHz                    
-     8,242,044,959      instructions                     #    2.11  insn per cycle         
-       1.370327142 seconds time elapsed
+     3,902,872,992      cycles                           #    2.742 GHz                    
+     8,243,358,841      instructions                     #    2.11  insn per cycle         
+       1.424422244 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3350) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.995768e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.031926e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.031926e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.783463e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.004663e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.004663e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079550e+00 +- 3.404207e-03 )  GeV^0
-TOTAL       :     1.306099 sec
+TOTAL       :     1.336432 sec
 INFO: No Floating Point Exceptions have been reported
-     3,721,703,946      cycles                           #    2.840 GHz                    
-     7,863,594,201      instructions                     #    2.11  insn per cycle         
-       1.311330370 seconds time elapsed
+     3,705,633,949      cycles                           #    2.764 GHz                    
+     7,864,024,156      instructions                     #    2.12  insn per cycle         
+       1.341589225 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3196) (512y:   20) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.758543e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.446976e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.446976e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.472436e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.118654e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.118654e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079550e+00 +- 3.404208e-03 )  GeV^0
-TOTAL       :     1.692116 sec
+TOTAL       :     1.763167 sec
 INFO: No Floating Point Exceptions have been reported
-     3,425,904,021      cycles                           #    2.019 GHz                    
-     6,042,797,691      instructions                     #    1.76  insn per cycle         
-       1.697363173 seconds time elapsed
+     3,422,334,381      cycles                           #    1.936 GHz                    
+     6,043,294,482      instructions                     #    1.77  insn per cycle         
+       1.768435505 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2287) (512y:   24) (512z: 2153)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt
index a9cab1763c..0d8155a7b6 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 01s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_20:26:38
+DATE: 2024-08-29_23:37:22
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.225239e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.197913e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.389129e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.074442e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.188782e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.393838e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086719e+00 +- 3.413389e-03 )  GeV^0
-TOTAL       :     0.517448 sec
+TOTAL       :     0.517208 sec
 INFO: No Floating Point Exceptions have been reported
-     2,112,624,842      cycles                           #    2.859 GHz                    
-     3,317,853,292      instructions                     #    1.57  insn per cycle         
-       0.795716447 seconds time elapsed
+     2,103,227,271      cycles                           #    2.862 GHz                    
+     3,266,134,218      instructions                     #    1.55  insn per cycle         
+       0.793508043 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 149
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.922136e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.976186e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.976186e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.912478e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.965888e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.965888e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086780e+00 +- 3.413794e-03 )  GeV^0
-TOTAL       :     5.540195 sec
+TOTAL       :     5.567336 sec
 INFO: No Floating Point Exceptions have been reported
-    16,275,080,243      cycles                           #    2.936 GHz                    
-    45,337,789,928      instructions                     #    2.79  insn per cycle         
-       5.545390256 seconds time elapsed
+    16,244,625,996      cycles                           #    2.916 GHz                    
+    45,334,027,591      instructions                     #    2.79  insn per cycle         
+       5.572437548 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  592) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.488675e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.824628e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.824628e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.519328e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.854934e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.854934e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086779e+00 +- 3.413793e-03 )  GeV^0
-TOTAL       :     2.422437 sec
+TOTAL       :     2.405432 sec
 INFO: No Floating Point Exceptions have been reported
-     7,052,758,354      cycles                           #    2.906 GHz                    
-    17,767,509,302      instructions                     #    2.52  insn per cycle         
-       2.427864435 seconds time elapsed
+     7,053,566,121      cycles                           #    2.927 GHz                    
+    17,767,653,624      instructions                     #    2.52  insn per cycle         
+       2.410646458 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.294778e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.430722e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.430722e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.279648e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.384307e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.384307e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.350098 sec
+TOTAL       :     1.350250 sec
 INFO: No Floating Point Exceptions have been reported
-     3,737,878,511      cycles                           #    2.759 GHz                    
-     8,257,495,819      instructions                     #    2.21  insn per cycle         
-       1.355605620 seconds time elapsed
+     3,726,374,980      cycles                           #    2.751 GHz                    
+     8,258,716,755      instructions                     #    2.22  insn per cycle         
+       1.355419801 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3350) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.700373e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.969590e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.969590e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.799998e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.006589e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.006589e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.290488 sec
+TOTAL       :     1.276520 sec
 INFO: No Floating Point Exceptions have been reported
-     3,556,397,958      cycles                           #    2.746 GHz                    
-     7,911,980,107      instructions                     #    2.22  insn per cycle         
-       1.296127398 seconds time elapsed
+     3,537,758,951      cycles                           #    2.762 GHz                    
+     7,913,131,472      instructions                     #    2.24  insn per cycle         
+       1.281778078 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3196) (512y:   20) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.356565e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.990428e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.990428e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.474459e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.121773e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.121773e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.736165 sec
+TOTAL       :     1.704531 sec
 INFO: No Floating Point Exceptions have been reported
-     3,256,937,975      cycles                           #    1.871 GHz                    
-     6,093,354,447      instructions                     #    1.87  insn per cycle         
-       1.741565922 seconds time elapsed
+     3,253,759,575      cycles                           #    1.904 GHz                    
+     6,092,424,314      instructions                     #    1.87  insn per cycle         
+       1.709851210 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2287) (512y:   24) (512z: 2153)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt
index 1b7d56c0f4..f704621ca8 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 00s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_20:23:55
+DATE: 2024-08-29_23:34:36
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -50,15 +54,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.925974e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.195417e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.383637e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.623564e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.182304e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.386853e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086805e+00 +- 3.414078e-03 )  GeV^0
-TOTAL       :     0.617651 sec
+TOTAL       :     0.621738 sec
 INFO: No Floating Point Exceptions have been reported
-     2,472,700,101      cycles                           #    2.956 GHz                    
-     3,844,270,088      instructions                     #    1.55  insn per cycle         
-       0.895131936 seconds time elapsed
+     2,430,258,613      cycles                           #    2.875 GHz                    
+     3,766,141,245      instructions                     #    1.55  insn per cycle         
+       0.902091417 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 149
@@ -84,15 +88,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.959227e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.014297e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.014297e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.919517e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.974180e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.974180e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086780e+00 +- 3.413794e-03 )  GeV^0
-TOTAL       :     5.435139 sec
+TOTAL       :     5.547153 sec
 INFO: No Floating Point Exceptions have been reported
-    16,264,887,736      cycles                           #    2.990 GHz                    
-    45,334,381,661      instructions                     #    2.79  insn per cycle         
-       5.440210307 seconds time elapsed
+    16,239,383,395      cycles                           #    2.925 GHz                    
+    45,332,563,163      instructions                     #    2.79  insn per cycle         
+       5.552300455 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  592) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -113,15 +117,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.519066e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.848466e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.848466e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.514723e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.851864e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.851864e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086779e+00 +- 3.413793e-03 )  GeV^0
-TOTAL       :     2.405682 sec
+TOTAL       :     2.407844 sec
 INFO: No Floating Point Exceptions have been reported
-     7,056,903,182      cycles                           #    2.928 GHz                    
-    17,767,514,446      instructions                     #    2.52  insn per cycle         
-       2.410973137 seconds time elapsed
+     7,048,396,058      cycles                           #    2.922 GHz                    
+    17,769,003,065      instructions                     #    2.52  insn per cycle         
+       2.412983960 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -142,15 +146,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.565756e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.749553e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.749553e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.312587e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.437297e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.437297e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.305436 sec
+TOTAL       :     1.346063 sec
 INFO: No Floating Point Exceptions have been reported
-     3,753,143,327      cycles                           #    2.865 GHz                    
-     8,257,983,801      instructions                     #    2.20  insn per cycle         
-       1.310628316 seconds time elapsed
+     3,728,179,537      cycles                           #    2.761 GHz                    
+     8,258,013,414      instructions                     #    2.22  insn per cycle         
+       1.351324909 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3350) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -171,15 +175,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.040312e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.036836e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.036836e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.782227e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.003815e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.003815e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.242569 sec
+TOTAL       :     1.278814 sec
 INFO: No Floating Point Exceptions have been reported
-     3,552,004,540      cycles                           #    2.848 GHz                    
-     7,912,724,917      instructions                     #    2.23  insn per cycle         
-       1.247741947 seconds time elapsed
+     3,538,647,518      cycles                           #    2.758 GHz                    
+     7,913,188,307      instructions                     #    2.24  insn per cycle         
+       1.283975197 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3196) (512y:   20) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -200,15 +204,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.813901e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.506813e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.506813e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.475607e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.120414e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.120414e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.621227 sec
+TOTAL       :     1.703723 sec
 INFO: No Floating Point Exceptions have been reported
-     3,253,421,004      cycles                           #    2.002 GHz                    
-     6,092,602,588      instructions                     #    1.87  insn per cycle         
-       1.626390565 seconds time elapsed
+     3,247,851,973      cycles                           #    1.901 GHz                    
+     6,092,721,375      instructions                     #    1.88  insn per cycle         
+       1.708887964 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2287) (512y:   24) (512z: 2153)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt
index 613986d3ca..6b132c1b1f 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 50s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_19:51:52
+DATE: 2024-08-29_22:49:32
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.011234e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.481106e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.718662e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.471980e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.448283e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.703783e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086719e+00 +- 3.413389e-03 )  GeV^0
-TOTAL       :     0.482144 sec
+TOTAL       :     0.483699 sec
 INFO: No Floating Point Exceptions have been reported
-     2,069,508,701      cycles                           #    2.943 GHz                    
-     2,973,558,730      instructions                     #    1.44  insn per cycle         
-       0.762169669 seconds time elapsed
+     2,013,562,134      cycles                           #    2.853 GHz                    
+     2,909,226,103      instructions                     #    1.44  insn per cycle         
+       0.764230645 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.000971e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.057776e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.057776e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.954648e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.010377e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.010377e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086780e+00 +- 3.413794e-03 )  GeV^0
-TOTAL       :     5.337569 sec
+TOTAL       :     5.448737 sec
 INFO: No Floating Point Exceptions have been reported
-    16,045,528,009      cycles                           #    3.003 GHz                    
-    44,492,603,616      instructions                     #    2.77  insn per cycle         
-       5.344572857 seconds time elapsed
+    15,968,697,173      cycles                           #    2.929 GHz                    
+    44,442,467,905      instructions                     #    2.78  insn per cycle         
+       5.453950650 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  537) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.399267e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.870292e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.870292e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.288745e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.754008e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.754008e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086779e+00 +- 3.413793e-03 )  GeV^0
-TOTAL       :     2.040967 sec
+TOTAL       :     2.067756 sec
 INFO: No Floating Point Exceptions have been reported
-     6,120,195,211      cycles                           #    2.990 GHz                    
-    17,124,524,771      instructions                     #    2.80  insn per cycle         
-       2.047704691 seconds time elapsed
+     6,064,224,873      cycles                           #    2.927 GHz                    
+    17,073,609,650      instructions                     #    2.82  insn per cycle         
+       2.072883041 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2864) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.231646e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.843621e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.843621e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.044707e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.619635e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.619635e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.779814 sec
+TOTAL       :     1.819445 sec
 INFO: No Floating Point Exceptions have been reported
-     5,080,547,059      cycles                           #    2.845 GHz                    
-    10,273,415,072      instructions                     #    2.02  insn per cycle         
-       1.786648263 seconds time elapsed
+     5,009,520,595      cycles                           #    2.747 GHz                    
+    10,222,223,141      instructions                     #    2.04  insn per cycle         
+       1.824644374 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3893) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.292968e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.928983e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.928983e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.086668e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.667060e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.667060e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.763357 sec
+TOTAL       :     1.808098 sec
 INFO: No Floating Point Exceptions have been reported
-     5,036,199,960      cycles                           #    2.847 GHz                    
-    10,043,698,662      instructions                     #    1.99  insn per cycle         
-       1.770080531 seconds time elapsed
+     4,954,174,809      cycles                           #    2.733 GHz                    
+     9,992,697,574      instructions                     #    2.02  insn per cycle         
+       1.813464382 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3794) (512y:    2) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.908901e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.261898e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.261898e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.638616e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.961119e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.961119e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     2.233509 sec
+TOTAL       :     2.344859 sec
 INFO: No Floating Point Exceptions have been reported
-     4,417,373,079      cycles                           #    1.973 GHz                    
-     8,493,082,992      instructions                     #    1.92  insn per cycle         
-       2.240143434 seconds time elapsed
+     4,359,058,163      cycles                           #    1.856 GHz                    
+     8,441,473,825      instructions                     #    1.94  insn per cycle         
+       2.350013338 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2784) (512y:    4) (512z: 2752)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt
index 0ca4814912..67668751a3 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 31s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_20:09:41
+DATE: 2024-08-29_23:20:00
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.662526e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.213312e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.395769e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.693096e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.181249e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.397047e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086719e+00 +- 3.413389e-03 )  GeV^0
-TOTAL       :     0.479336 sec
+TOTAL       :     0.488701 sec
 INFO: No Floating Point Exceptions have been reported
-     2,068,711,068      cycles                           #    2.929 GHz                    
-     2,952,499,501      instructions                     #    1.43  insn per cycle         
-       0.763196119 seconds time elapsed
+     2,026,366,666      cycles                           #    2.849 GHz                    
+     2,929,563,128      instructions                     #    1.45  insn per cycle         
+       0.770424636 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 149
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.557673e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.652343e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.652343e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.491467e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.583014e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.583014e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086780e+00 +- 3.413794e-03 )  GeV^0
-TOTAL       :     4.192940 sec
+TOTAL       :     4.294115 sec
 INFO: No Floating Point Exceptions have been reported
-    12,602,357,038      cycles                           #    3.002 GHz                    
-    34,631,326,432      instructions                     #    2.75  insn per cycle         
-       4.199620510 seconds time elapsed
+    12,564,892,138      cycles                           #    2.923 GHz                    
+    34,594,410,065      instructions                     #    2.75  insn per cycle         
+       4.299702876 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  683) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.457087e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.945109e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.945109e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.316170e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.785295e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.785295e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086779e+00 +- 3.413793e-03 )  GeV^0
-TOTAL       :     2.017495 sec
+TOTAL       :     2.059154 sec
 INFO: No Floating Point Exceptions have been reported
-     6,096,552,375      cycles                           #    3.013 GHz                    
-    14,886,527,681      instructions                     #    2.44  insn per cycle         
-       2.024226195 seconds time elapsed
+     6,055,050,463      cycles                           #    2.934 GHz                    
+    14,843,315,905      instructions                     #    2.45  insn per cycle         
+       2.064883066 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2980) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.320703e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.178361e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.178361e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.125837e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.942815e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.942815e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.525431 sec
+TOTAL       :     1.556816 sec
 INFO: No Floating Point Exceptions have been reported
-     4,362,864,395      cycles                           #    2.849 GHz                    
-     9,093,170,699      instructions                     #    2.08  insn per cycle         
-       1.532091223 seconds time elapsed
+     4,301,919,832      cycles                           #    2.755 GHz                    
+     9,049,103,047      instructions                     #    2.10  insn per cycle         
+       1.562522898 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4446) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.442008e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.347351e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.347351e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.332880e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.184727e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.184727e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.505548 sec
+TOTAL       :     1.516476 sec
 INFO: No Floating Point Exceptions have been reported
-     4,283,778,078      cycles                           #    2.834 GHz                    
-     8,707,570,636      instructions                     #    2.03  insn per cycle         
-       1.512346731 seconds time elapsed
+     4,186,126,979      cycles                           #    2.752 GHz                    
+     8,657,136,628      instructions                     #    2.07  insn per cycle         
+       1.522018326 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4213) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.480199e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.987074e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.987074e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.348540e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.787233e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.787233e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     2.010348 sec
+TOTAL       :     2.045299 sec
 INFO: No Floating Point Exceptions have been reported
-     3,921,508,341      cycles                           #    1.945 GHz                    
-     7,849,973,775      instructions                     #    2.00  insn per cycle         
-       2.017051814 seconds time elapsed
+     3,828,361,693      cycles                           #    1.868 GHz                    
+     7,802,705,560      instructions                     #    2.04  insn per cycle         
+       2.050733542 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4252) (512y:    0) (512z: 2556)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt
index c66a4f9500..25cb308f89 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 30s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_20:10:00
+DATE: 2024-08-29_23:20:20
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.014498e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.491996e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.727921e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.117401e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.441872e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.719277e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086719e+00 +- 3.413389e-03 )  GeV^0
-TOTAL       :     0.481358 sec
+TOTAL       :     0.484318 sec
 INFO: No Floating Point Exceptions have been reported
-     2,037,978,515      cycles                           #    2.886 GHz                    
-     2,961,010,767      instructions                     #    1.45  insn per cycle         
-       0.762837811 seconds time elapsed
+     2,027,044,969      cycles                           #    2.865 GHz                    
+     2,915,708,806      instructions                     #    1.44  insn per cycle         
+       0.765596160 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.697323e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.802206e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.802206e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.603125e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.703630e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.703630e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086780e+00 +- 3.413794e-03 )  GeV^0
-TOTAL       :     3.980371 sec
+TOTAL       :     4.113547 sec
 INFO: No Floating Point Exceptions have been reported
-    11,889,490,017      cycles                           #    2.983 GHz                    
-    35,106,748,392      instructions                     #    2.95  insn per cycle         
-       3.987184887 seconds time elapsed
+    11,856,983,433      cycles                           #    2.879 GHz                    
+    35,064,975,773      instructions                     #    2.96  insn per cycle         
+       4.119151430 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  453) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.502653e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.994079e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.994079e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.399477e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.883757e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.883757e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086779e+00 +- 3.413793e-03 )  GeV^0
-TOTAL       :     1.999831 sec
+TOTAL       :     2.027290 sec
 INFO: No Floating Point Exceptions have been reported
-     5,999,305,364      cycles                           #    2.992 GHz                    
-    14,506,447,484      instructions                     #    2.42  insn per cycle         
-       2.006483206 seconds time elapsed
+     5,947,707,323      cycles                           #    2.927 GHz                    
+    14,463,680,627      instructions                     #    2.43  insn per cycle         
+       2.032751601 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2559) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.608204e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.550220e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.550220e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.403034e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.277469e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.277469e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.473214 sec
+TOTAL       :     1.501309 sec
 INFO: No Floating Point Exceptions have been reported
-     4,213,841,990      cycles                           #    2.849 GHz                    
-     8,921,034,070      instructions                     #    2.12  insn per cycle         
-       1.479975021 seconds time elapsed
+     4,151,474,281      cycles                           #    2.757 GHz                    
+     8,876,254,548      instructions                     #    2.14  insn per cycle         
+       1.506811786 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3556) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.485226e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.400149e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.400149e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.465468e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.351611e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.351611e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.496613 sec
+TOTAL       :     1.489055 sec
 INFO: No Floating Point Exceptions have been reported
-     4,261,968,497      cycles                           #    2.836 GHz                    
-     8,450,409,335      instructions                     #    1.98  insn per cycle         
-       1.503441367 seconds time elapsed
+     4,126,432,660      cycles                           #    2.763 GHz                    
+     8,402,940,741      instructions                     #    2.04  insn per cycle         
+       1.494518525 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3284) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.731827e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.224198e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.224198e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.443520e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.895951e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.895951e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.924845 sec
+TOTAL       :     2.012161 sec
 INFO: No Floating Point Exceptions have been reported
-     3,821,108,888      cycles                           #    1.979 GHz                    
-     7,740,611,821      instructions                     #    2.03  insn per cycle         
-       1.931585644 seconds time elapsed
+     3,781,259,830      cycles                           #    1.876 GHz                    
+     7,694,397,332      instructions                     #    2.03  insn per cycle         
+       2.017683593 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3268) (512y:    0) (512z: 2108)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inlL_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inlL_hrd0.txt
new file mode 100644
index 0000000000..88e0c0cc23
--- /dev/null
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inlL_hrd0.txt
@@ -0,0 +1,229 @@
+
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 26s
+------------------------------------------------
+
+Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+BACKEND=cpp512y (was cppauto)
+OMPFLAGS=
+FPTYPE='d'
+HELINL='0'
+HRDCOD='0'
+HASCURAND=hasCurand
+HASHIPRAND=hasNoHiprand
+Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
+make: Nothing to be done for 'gtestlibs'.
+
+make USEBUILDDIR=1 BACKEND=cuda
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+
+make USEBUILDDIR=1 BACKEND=cppnone
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+
+make USEBUILDDIR=1 BACKEND=cppsse4
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+
+make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+
+make USEBUILDDIR=1 BACKEND=cpp512y
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+
+make USEBUILDDIR=1 BACKEND=cpp512z
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+
+DATE: 2024-08-30_00:40:55
+
+On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inlL_hrd0/check_cuda.exe -p 2048 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 5.192882e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.089264e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.349810e+07                 )  sec^-1
+MeanMatrixElemValue         = ( 2.086719e+00 +- 3.413389e-03 )  GeV^0
+TOTAL       :     0.497973 sec
+INFO: No Floating Point Exceptions have been reported
+     2,096,398,508      cycles                           #    2.859 GHz                    
+     2,994,499,369      instructions                     #    1.43  insn per cycle         
+       0.791737429 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inlL_hrd0/check_cuda.exe -p 2048 256 1
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 123
+==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inlL_hrd0/runTest_cuda.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inlL_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inlL_hrd0/fcheck_cuda.exe 2 64 2
+Avg ME (C++/GPU)   = 2.028811e+00
+Avg ME (F77/GPU)   = 2.0288499360904098
+Relative difference = 1.9191580886348533e-05
+OK (relative difference <= 5E-3)
+=========================================================================
+Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inlL_hrd0/check_hip.exe
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inlL_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
+EvtsPerSec[Rmb+ME]     (23) = ( 1.881362e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.933083e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.933083e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.086780e+00 +- 3.413794e-03 )  GeV^0
+TOTAL       :     5.658010 sec
+INFO: No Floating Point Exceptions have been reported
+    16,591,003,422      cycles                           #    2.930 GHz                    
+    46,087,518,387      instructions                     #    2.78  insn per cycle         
+       5.663354619 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  270) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 2.028820e+00
+Avg ME (F77/C++)    = 2.0288198692724109
+Relative difference = 6.443528218283898e-08
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inlL_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 4.527575e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.866741e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.866741e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.086779e+00 +- 3.413793e-03 )  GeV^0
+TOTAL       :     2.401365 sec
+INFO: No Floating Point Exceptions have been reported
+     7,034,262,327      cycles                           #    2.924 GHz                    
+    17,839,078,361      instructions                     #    2.54  insn per cycle         
+       2.406840856 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1424) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 2.028819e+00
+Avg ME (F77/C++)    = 2.0288193075684831
+Relative difference = 1.515997647531052e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inlL_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 8.244663e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.347556e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.347556e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
+TOTAL       :     1.356627 sec
+INFO: No Floating Point Exceptions have been reported
+     3,737,984,082      cycles                           #    2.747 GHz                    
+     8,242,296,261      instructions                     #    2.21  insn per cycle         
+       1.362038191 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2266) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 2.028818e+00
+Avg ME (F77/C++)    = 2.0288181869545951
+Relative difference = 9.214951531400725e-08
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inlL_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 8.643185e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.851432e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.851432e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
+TOTAL       :     1.297741 sec
+INFO: No Floating Point Exceptions have been reported
+     3,559,047,423      cycles                           #    2.733 GHz                    
+     7,925,278,059      instructions                     #    2.23  insn per cycle         
+       1.303051062 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2139) (512y:   20) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 2.028818e+00
+Avg ME (F77/C++)    = 2.0288181869545951
+Relative difference = 9.214951531400725e-08
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inlL_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 6.394990e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.029889e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.029889e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
+TOTAL       :     1.725092 sec
+INFO: No Floating Point Exceptions have been reported
+     3,283,809,445      cycles                           #    1.899 GHz                    
+     6,177,490,524      instructions                     #    1.88  insn per cycle         
+       1.730479793 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2272) (512y:   24) (512z: 1187)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 2.028818e+00
+Avg ME (F77/C++)    = 2.0288183141965419
+Relative difference = 1.5486679540361597e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+
+TEST COMPLETED
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt
index 9e258a42c8..348de144bc 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 55s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_19:52:14
+DATE: 2024-08-29_22:49:54
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.928215e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.172881e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.273641e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.721947e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.164266e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.274422e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.521950 sec
+TOTAL       :     0.527209 sec
 INFO: No Floating Point Exceptions have been reported
-     2,213,686,839      cycles                           #    2.946 GHz                    
-     3,178,577,075      instructions                     #    1.44  insn per cycle         
-       0.810096796 seconds time elapsed
+     2,174,329,993      cycles                           #    2.863 GHz                    
+     3,151,780,179      instructions                     #    1.45  insn per cycle         
+       0.816795416 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.841341e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.888035e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.888035e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.792613e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.837824e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.837824e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     5.832684 sec
+TOTAL       :     5.955327 sec
 INFO: No Floating Point Exceptions have been reported
-    17,545,887,667      cycles                           #    3.004 GHz                    
-    46,212,560,657      instructions                     #    2.63  insn per cycle         
-       5.842093812 seconds time elapsed
+    17,397,526,002      cycles                           #    2.920 GHz                    
+    46,091,587,032      instructions                     #    2.65  insn per cycle         
+       5.960944985 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  618) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.270852e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.438233e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.438233e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.176482e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.332914e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.332914e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.344937 sec
+TOTAL       :     3.408214 sec
 INFO: No Floating Point Exceptions have been reported
-    10,073,495,315      cycles                           #    3.004 GHz                    
-    27,713,045,845      instructions                     #    2.75  insn per cycle         
-       3.354389607 seconds time elapsed
+     9,950,593,950      cycles                           #    2.916 GHz                    
+    27,591,843,158      instructions                     #    2.77  insn per cycle         
+       3.413688048 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2581) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.229785e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.644944e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.644944e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.032838e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.417454e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.417454e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.142226 sec
+TOTAL       :     2.190453 sec
 INFO: No Floating Point Exceptions have been reported
-     6,138,817,492      cycles                           #    2.854 GHz                    
-    12,602,197,399      instructions                     #    2.05  insn per cycle         
-       2.151581868 seconds time elapsed
+     6,006,383,686      cycles                           #    2.736 GHz                    
+    12,480,601,360      instructions                     #    2.08  insn per cycle         
+       2.196062875 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2762) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.722165e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.222047e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.222047e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.546431e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.015514e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.015514e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     1.971142 sec
+TOTAL       :     1.997058 sec
 INFO: No Floating Point Exceptions have been reported
-     5,621,798,133      cycles                           #    2.839 GHz                    
-    12,035,423,234      instructions                     #    2.14  insn per cycle         
-       1.980714349 seconds time elapsed
+     5,515,075,827      cycles                           #    2.755 GHz                    
+    11,914,073,909      instructions                     #    2.16  insn per cycle         
+       2.002560475 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2507) (512y:  146) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.784432e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.992571e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.992571e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.568540e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.754917e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.754917e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.909360 sec
+TOTAL       :     3.044039 sec
 INFO: No Floating Point Exceptions have been reported
-     5,725,311,509      cycles                           #    1.962 GHz                    
-     8,228,178,315      instructions                     #    1.44  insn per cycle         
-       2.919447921 seconds time elapsed
+     5,590,219,431      cycles                           #    1.834 GHz                    
+     8,106,058,271      instructions                     #    1.45  insn per cycle         
+       3.049627104 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1646) (512y:  126) (512z: 1862)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt
index 0491e4ed6d..57c5b89892 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 49s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_19:52:38
+DATE: 2024-08-29_22:50:19
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.017343e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.179179e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.286659e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.735697e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.172278e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.284391e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.519682 sec
+TOTAL       :     0.527511 sec
 INFO: No Floating Point Exceptions have been reported
-     2,213,688,235      cycles                           #    2.946 GHz                    
-     3,194,056,853      instructions                     #    1.44  insn per cycle         
-       0.808260316 seconds time elapsed
+     2,171,700,119      cycles                           #    2.860 GHz                    
+     3,131,301,039      instructions                     #    1.44  insn per cycle         
+       0.816870399 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.869136e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.918050e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.918050e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.846511e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.895008e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.895008e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     5.752898 sec
+TOTAL       :     5.785374 sec
 INFO: No Floating Point Exceptions have been reported
-    17,074,104,828      cycles                           #    2.963 GHz                    
-    45,236,287,915      instructions                     #    2.65  insn per cycle         
-       5.764326274 seconds time elapsed
+    16,941,917,135      cycles                           #    2.926 GHz                    
+    45,116,651,747      instructions                     #    2.66  insn per cycle         
+       5.790862754 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  569) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.441463e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.626872e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.626872e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.350385e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.523191e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.523191e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.185909 sec
+TOTAL       :     3.236469 sec
 INFO: No Floating Point Exceptions have been reported
-     9,649,087,118      cycles                           #    3.020 GHz                    
-    26,365,137,437      instructions                     #    2.73  insn per cycle         
-       3.195361891 seconds time elapsed
+     9,506,236,409      cycles                           #    2.933 GHz                    
+    26,243,746,265      instructions                     #    2.76  insn per cycle         
+       3.242056418 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2385) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.613455e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.935335e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.935335e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.486304e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.788448e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.788448e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.413480 sec
+TOTAL       :     2.442979 sec
 INFO: No Floating Point Exceptions have been reported
-     6,867,786,043      cycles                           #    2.835 GHz                    
-    14,147,220,960      instructions                     #    2.06  insn per cycle         
-       2.423178008 seconds time elapsed
+     6,728,910,073      cycles                           #    2.749 GHz                    
+    14,027,146,476      instructions                     #    2.08  insn per cycle         
+       2.448326705 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2884) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.856156e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.210888e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.210888e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.752703e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.089158e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.089158e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.298392 sec
+TOTAL       :     2.312345 sec
 INFO: No Floating Point Exceptions have been reported
-     6,526,789,768      cycles                           #    2.829 GHz                    
-    13,640,691,375      instructions                     #    2.09  insn per cycle         
-       2.307759550 seconds time elapsed
+     6,377,970,315      cycles                           #    2.752 GHz                    
+    13,518,991,158      instructions                     #    2.12  insn per cycle         
+       2.317849497 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2523) (512y:  302) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.731216e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.937483e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.937483e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.573013e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.761275e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.761275e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.951920 sec
+TOTAL       :     3.040837 sec
 INFO: No Floating Point Exceptions have been reported
-     5,713,181,383      cycles                           #    1.930 GHz                    
-     9,325,302,677      instructions                     #    1.63  insn per cycle         
-       2.961562881 seconds time elapsed
+     5,577,789,241      cycles                           #    1.832 GHz                    
+     9,204,522,980      instructions                     #    1.65  insn per cycle         
+       3.046305567 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1431) (512y:  212) (512z: 2059)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inlL_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inlL_hrd0.txt
new file mode 100644
index 0000000000..165d4134f5
--- /dev/null
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inlL_hrd0.txt
@@ -0,0 +1,229 @@
+
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 27s
+------------------------------------------------
+
+Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+BACKEND=cpp512y (was cppauto)
+OMPFLAGS=
+FPTYPE='d'
+HELINL='0'
+HRDCOD='0'
+HASCURAND=hasCurand
+HASHIPRAND=hasNoHiprand
+Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
+make: Nothing to be done for 'gtestlibs'.
+
+make USEBUILDDIR=1 BACKEND=cuda
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+
+make USEBUILDDIR=1 BACKEND=cppnone
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+
+make USEBUILDDIR=1 BACKEND=cppsse4
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+
+make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+
+make USEBUILDDIR=1 BACKEND=cpp512y
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+
+make USEBUILDDIR=1 BACKEND=cpp512z
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+
+DATE: 2024-08-30_00:41:16
+
+On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inlL_hrd0/check_cuda.exe -p 2048 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 2.696639e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.125840e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.255586e+07                 )  sec^-1
+MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
+TOTAL       :     0.550164 sec
+INFO: No Floating Point Exceptions have been reported
+     2,274,143,342      cycles                           #    2.865 GHz                    
+     3,343,034,761      instructions                     #    1.47  insn per cycle         
+       0.850551745 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inlL_hrd0/check_cuda.exe -p 2048 256 1
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 190
+==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inlL_hrd0/runTest_cuda.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inlL_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inlL_hrd0/fcheck_cuda.exe 2 64 2
+Avg ME (C++/GPU)   = 2.028807e+00
+Avg ME (F77/GPU)   = 2.0288063423243874
+Relative difference = 3.241686432649386e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inlL_hrd0/check_hip.exe
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inlL_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
+EvtsPerSec[Rmb+ME]     (23) = ( 1.801992e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.847672e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.847672e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
+TOTAL       :     5.925140 sec
+INFO: No Floating Point Exceptions have been reported
+    17,354,450,078      cycles                           #    2.927 GHz                    
+    46,306,499,591      instructions                     #    2.67  insn per cycle         
+       5.930817571 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  272) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 2.028807e+00
+Avg ME (F77/C++)    = 2.0288063903750300
+Relative difference = 3.0048445715164216e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inlL_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 3.128302e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.278597e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.278597e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
+TOTAL       :     3.459445 sec
+INFO: No Floating Point Exceptions have been reported
+    10,137,039,547      cycles                           #    2.927 GHz                    
+    27,907,316,300      instructions                     #    2.75  insn per cycle         
+       3.465119305 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  916) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 2.028807e+00
+Avg ME (F77/C++)    = 2.0288063903750300
+Relative difference = 3.0048445715164216e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inlL_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 5.012441e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.394916e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.394916e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
+TOTAL       :     2.198734 sec
+INFO: No Floating Point Exceptions have been reported
+     6,071,213,011      cycles                           #    2.755 GHz                    
+    12,491,073,353      instructions                     #    2.06  insn per cycle         
+       2.204354625 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1690) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 2.028807e+00
+Avg ME (F77/C++)    = 2.0288064057068964
+Relative difference = 2.9292737240031234e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inlL_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 5.508580e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.963715e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.963715e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
+TOTAL       :     2.009851 sec
+INFO: No Floating Point Exceptions have been reported
+     5,545,482,765      cycles                           #    2.753 GHz                    
+    11,970,235,243      instructions                     #    2.16  insn per cycle         
+       2.015344652 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1470) (512y:  146) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 2.028807e+00
+Avg ME (F77/C++)    = 2.0288064057068964
+Relative difference = 2.9292737240031234e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inlL_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 3.549331e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.734436e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.734436e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
+TOTAL       :     3.060211 sec
+INFO: No Floating Point Exceptions have been reported
+     5,636,195,026      cycles                           #    1.839 GHz                    
+     8,246,445,178      instructions                     #    1.46  insn per cycle         
+       3.065868647 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1605) (512y:  126) (512z:  912)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 2.028807e+00
+Avg ME (F77/C++)    = 2.0288064057068964
+Relative difference = 2.9292737240031234e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+
+TEST COMPLETED
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt
index f4571b9f6b..07a9da8cf2 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 01m 05s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-08-08_19:53:03
+DATE: 2024-08-29_22:50:44
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.927019e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.050993e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.064681e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.768055e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.041158e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.054837e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     0.466363 sec
+TOTAL       :     0.468421 sec
 INFO: No Floating Point Exceptions have been reported
-     2,031,704,885      cycles                           #    2.932 GHz                    
-     2,907,931,480      instructions                     #    1.43  insn per cycle         
-       0.749954927 seconds time elapsed
+     1,983,649,354      cycles                           #    2.860 GHz                    
+     2,849,635,281      instructions                     #    1.44  insn per cycle         
+       0.752061218 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.108955e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.322519e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.334742e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.093113e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.321988e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.335513e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 6.734461e+02 +- 4.775415e+02 )  GeV^-2
-TOTAL       :     0.601379 sec
+TOTAL       :     0.607399 sec
 INFO: No Floating Point Exceptions have been reported
-     2,455,141,462      cycles                           #    2.938 GHz                    
-     3,762,396,340      instructions                     #    1.53  insn per cycle         
-       0.893863333 seconds time elapsed
+     2,417,979,870      cycles                           #    2.867 GHz                    
+     3,633,633,356      instructions                     #    1.50  insn per cycle         
+       0.902950603 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.481232e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.493616e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.493616e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.416355e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.428534e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.428534e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     6.623962 sec
+TOTAL       :     6.801356 sec
 INFO: No Floating Point Exceptions have been reported
-    19,900,544,736      cycles                           #    3.003 GHz                    
-    59,917,689,995      instructions                     #    3.01  insn per cycle         
-       6.628146634 seconds time elapsed
+    19,904,115,348      cycles                           #    2.925 GHz                    
+    59,914,819,169      instructions                     #    3.01  insn per cycle         
+       6.805500502 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.692821e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.734716e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.734716e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.569173e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.611795e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.611795e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     3.511000 sec
+TOTAL       :     3.605969 sec
 INFO: No Floating Point Exceptions have been reported
-    10,573,188,323      cycles                           #    3.009 GHz                    
-    31,088,228,992      instructions                     #    2.94  insn per cycle         
-       3.514850116 seconds time elapsed
+    10,580,697,478      cycles                           #    2.932 GHz                    
+    31,088,445,704      instructions                     #    2.94  insn per cycle         
+       3.610227674 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.311594e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.480158e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.480158e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.052369e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.218380e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.218380e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.779751 sec
+TOTAL       :     1.830002 sec
 INFO: No Floating Point Exceptions have been reported
-     4,993,361,094      cycles                           #    2.801 GHz                    
-    11,406,864,540      instructions                     #    2.28  insn per cycle         
-       1.783592873 seconds time elapsed
+     5,001,433,513      cycles                           #    2.728 GHz                    
+    11,404,945,118      instructions                     #    2.28  insn per cycle         
+       1.834254964 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4635) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.047569e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.068559e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.068559e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.021999e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.042648e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.042648e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.583863 sec
+TOTAL       :     1.623312 sec
 INFO: No Floating Point Exceptions have been reported
-     4,443,684,141      cycles                           #    2.800 GHz                    
-    10,665,267,804      instructions                     #    2.40  insn per cycle         
-       1.587769074 seconds time elapsed
+     4,444,423,908      cycles                           #    2.732 GHz                    
+    10,663,668,834      instructions                     #    2.40  insn per cycle         
+       1.627519035 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4371) (512y:   91) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.461711e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.569260e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.569260e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.029000e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.130556e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.130556e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     2.218169 sec
+TOTAL       :     2.353437 sec
 INFO: No Floating Point Exceptions have been reported
-     4,131,467,216      cycles                           #    1.860 GHz                    
-     5,968,009,062      instructions                     #    1.44  insn per cycle         
-       2.222079730 seconds time elapsed
+     4,139,182,535      cycles                           #    1.756 GHz                    
+     5,966,452,479      instructions                     #    1.44  insn per cycle         
+       2.357699578 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1605) (512y:   95) (512z: 3576)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt
index a42937504e..3b4a073e1f 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 14s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-08-08_20:18:23
+DATE: 2024-08-29_23:28:57
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +57,15 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.687469e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.986061e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.986061e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.524968e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.790522e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.790522e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     0.493096 sec
+TOTAL       :     0.498207 sec
 INFO: No Floating Point Exceptions have been reported
-     2,045,059,008      cycles                           #    2.898 GHz                    
-     3,097,048,003      instructions                     #    1.51  insn per cycle         
-       0.762660564 seconds time elapsed
+     2,034,885,127      cycles                           #    2.867 GHz                    
+     3,099,307,889      instructions                     #    1.52  insn per cycle         
+       0.766751153 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
@@ -79,15 +83,15 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.805866e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.910227e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.910227e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.638749e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.629765e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.629765e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 6.734461e+02 +- 4.775415e+02 )  GeV^-2
-TOTAL       :     0.818307 sec
+TOTAL       :     0.835698 sec
 INFO: No Floating Point Exceptions have been reported
-     3,140,684,454      cycles                           #    2.950 GHz                    
-     5,061,508,169      instructions                     #    1.61  insn per cycle         
-       1.128278285 seconds time elapsed
+     3,068,002,737      cycles                           #    2.826 GHz                    
+     4,965,731,705      instructions                     #    1.62  insn per cycle         
+       1.143737678 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -110,15 +114,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.492873e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.505187e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.505187e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.419094e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.431319e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.431319e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     6.599351 sec
+TOTAL       :     6.801659 sec
 INFO: No Floating Point Exceptions have been reported
-    19,933,005,895      cycles                           #    3.019 GHz                    
-    59,920,307,427      instructions                     #    3.01  insn per cycle         
-       6.603770814 seconds time elapsed
+    19,950,430,190      cycles                           #    2.932 GHz                    
+    59,924,799,433      instructions                     #    3.00  insn per cycle         
+       6.806231180 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.695185e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.737821e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.737821e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.563840e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.607229e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.607229e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     3.515055 sec
+TOTAL       :     3.618278 sec
 INFO: No Floating Point Exceptions have been reported
-    10,602,064,942      cycles                           #    3.013 GHz                    
-    31,134,275,582      instructions                     #    2.94  insn per cycle         
-       3.519385575 seconds time elapsed
+    10,621,531,461      cycles                           #    2.933 GHz                    
+    31,136,479,712      instructions                     #    2.93  insn per cycle         
+       3.622674421 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -170,15 +174,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.301392e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.470755e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.470755e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.968662e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.136792e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.136792e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.788543 sec
+TOTAL       :     1.855111 sec
 INFO: No Floating Point Exceptions have been reported
-     5,028,204,629      cycles                           #    2.805 GHz                    
-    11,455,559,201      instructions                     #    2.28  insn per cycle         
-       1.792981978 seconds time elapsed
+     5,050,268,142      cycles                           #    2.717 GHz                    
+    11,453,960,155      instructions                     #    2.27  insn per cycle         
+       1.859579114 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4635) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -200,15 +204,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.050919e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.072418e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.072418e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.010354e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.031565e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.031565e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.585500 sec
+TOTAL       :     1.650652 sec
 INFO: No Floating Point Exceptions have been reported
-     4,477,945,053      cycles                           #    2.818 GHz                    
-    10,713,475,732      instructions                     #    2.39  insn per cycle         
-       1.589826674 seconds time elapsed
+     4,494,300,346      cycles                           #    2.716 GHz                    
+    10,714,275,385      instructions                     #    2.38  insn per cycle         
+       1.655223138 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4371) (512y:   91) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -230,15 +234,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.347709e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.453074e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.453074e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.026438e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.131749e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.131749e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     2.257984 sec
+TOTAL       :     2.362018 sec
 INFO: No Floating Point Exceptions have been reported
-     4,161,878,306      cycles                           #    1.840 GHz                    
-     6,004,301,884      instructions                     #    1.44  insn per cycle         
-       2.262398569 seconds time elapsed
+     4,180,671,804      cycles                           #    1.768 GHz                    
+     6,004,633,668      instructions                     #    1.44  insn per cycle         
+       2.366452325 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1605) (512y:   95) (512z: 3576)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt
index 6efe0f69f4..ec909fdebc 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 50s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-08-08_19:53:28
+DATE: 2024-08-29_22:51:10
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.841089e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.040503e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.053751e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.808286e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.043420e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.056801e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     0.462910 sec
+TOTAL       :     0.468123 sec
 INFO: No Floating Point Exceptions have been reported
-     2,010,149,699      cycles                           #    2.952 GHz                    
-     2,896,854,048      instructions                     #    1.44  insn per cycle         
-       0.738052118 seconds time elapsed
+     1,982,122,406      cycles                           #    2.866 GHz                    
+     2,840,505,864      instructions                     #    1.43  insn per cycle         
+       0.749220422 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.107639e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.318401e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.329750e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.087347e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.314577e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.327755e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 6.734461e+02 +- 4.775415e+02 )  GeV^-2
-TOTAL       :     0.598813 sec
+TOTAL       :     0.599896 sec
 INFO: No Floating Point Exceptions have been reported
-     2,457,830,026      cycles                           #    2.951 GHz                    
-     3,751,049,656      instructions                     #    1.53  insn per cycle         
-       0.893099521 seconds time elapsed
+     2,403,528,526      cycles                           #    2.872 GHz                    
+     3,686,433,169      instructions                     #    1.53  insn per cycle         
+       0.895610206 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.489979e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.502462e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.502462e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.407480e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.419549e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.419549e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     6.600482 sec
+TOTAL       :     6.825988 sec
 INFO: No Floating Point Exceptions have been reported
-    19,968,279,527      cycles                           #    3.024 GHz                    
-    60,133,262,996      instructions                     #    3.01  insn per cycle         
-       6.604278291 seconds time elapsed
+    19,902,263,810      cycles                           #    2.914 GHz                    
+    60,127,973,180      instructions                     #    3.02  insn per cycle         
+       6.830146319 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1322) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.723867e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.766716e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.766716e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.552176e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.594726e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.594726e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     3.487862 sec
+TOTAL       :     3.618973 sec
 INFO: No Floating Point Exceptions have been reported
-    10,481,040,414      cycles                           #    3.003 GHz                    
-    30,690,087,380      instructions                     #    2.93  insn per cycle         
-       3.491637208 seconds time elapsed
+    10,491,172,393      cycles                           #    2.896 GHz                    
+    30,689,021,169      instructions                     #    2.93  insn per cycle         
+       3.623315110 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 5047) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.840811e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.994004e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.994004e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.798771e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.955422e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.955422e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.873663 sec
+TOTAL       :     1.882242 sec
 INFO: No Floating Point Exceptions have been reported
-     5,129,466,442      cycles                           #    2.733 GHz                    
-    11,839,868,923      instructions                     #    2.31  insn per cycle         
-       1.877504725 seconds time elapsed
+     5,138,339,484      cycles                           #    2.725 GHz                    
+    11,838,435,308      instructions                     #    2.30  insn per cycle         
+       1.886500452 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4741) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.982969e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.017062e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.017062e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.596264e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.781452e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.781452e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.660972 sec
+TOTAL       :     1.727060 sec
 INFO: No Floating Point Exceptions have been reported
-     4,713,444,499      cycles                           #    2.833 GHz                    
-    11,164,953,266      instructions                     #    2.37  insn per cycle         
-       1.664821518 seconds time elapsed
+     4,719,460,247      cycles                           #    2.727 GHz                    
+    11,163,955,032      instructions                     #    2.37  insn per cycle         
+       1.731396360 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4396) (512y:  245) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.457192e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.563104e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.563104e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.994756e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.093121e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.093121e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     2.218804 sec
+TOTAL       :     2.364545 sec
 INFO: No Floating Point Exceptions have been reported
-     4,152,440,872      cycles                           #    1.869 GHz                    
-     6,219,243,593      instructions                     #    1.50  insn per cycle         
-       2.222530673 seconds time elapsed
+     4,159,347,839      cycles                           #    1.757 GHz                    
+     6,218,332,377      instructions                     #    1.50  insn per cycle         
+       2.368956290 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1501) (512y:  140) (512z: 3678)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inlL_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inlL_hrd0.txt
new file mode 100644
index 0000000000..43c2aad0df
--- /dev/null
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inlL_hrd0.txt
@@ -0,0 +1,244 @@
+
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 38s
+------------------------------------------------
+
+Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
+BACKEND=cpp512y (was cppauto)
+OMPFLAGS=
+FPTYPE='d'
+HELINL='0'
+HRDCOD='0'
+HASCURAND=hasCurand
+HASHIPRAND=hasNoHiprand
+Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
+make: Nothing to be done for 'gtestlibs'.
+
+make USEBUILDDIR=1 BACKEND=cuda
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+
+make USEBUILDDIR=1 BACKEND=cppnone
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+
+make USEBUILDDIR=1 BACKEND=cppsse4
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+
+make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+
+make USEBUILDDIR=1 BACKEND=cpp512y
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+
+make USEBUILDDIR=1 BACKEND=cpp512z
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+
+DATE: 2024-08-30_00:41:41
+
+On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inlL_hrd0/check_cuda.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 3.791187e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.081614e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.104189e+06                 )  sec^-1
+MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
+TOTAL       :     0.496328 sec
+INFO: No Floating Point Exceptions have been reported
+     2,036,036,052      cycles                           #    2.864 GHz                    
+     3,038,457,562      instructions                     #    1.49  insn per cycle         
+       0.769480205 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inlL_hrd0/check_cuda.exe -p 64 256 1
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 196
+==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
+.........................................................................
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inlL_hrd0/check_cuda.exe -p 2048 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 4.304219e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.621410e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.637847e+06                 )  sec^-1
+MeanMatrixElemValue         = ( 6.734461e+02 +- 4.775415e+02 )  GeV^-2
+TOTAL       :     0.740131 sec
+INFO: No Floating Point Exceptions have been reported
+     2,813,975,805      cycles                           #    2.874 GHz                    
+     4,712,824,125      instructions                     #    1.67  insn per cycle         
+       1.038573173 seconds time elapsed
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inlL_hrd0/runTest_cuda.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inlL_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inlL_hrd0/fcheck_cuda.exe 2 64 2
+Avg ME (C++/GPU)   = 1.413122e+00
+Avg ME (F77/GPU)   = 1.4131213684418646
+Relative difference = 4.4692399902091566e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inlL_hrd0/check_hip.exe
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
+EvtsPerSec[Rmb+ME]     (23) = ( 2.399032e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.410878e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.410878e+04                 )  sec^-1
+MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
+TOTAL       :     6.850554 sec
+INFO: No Floating Point Exceptions have been reported
+    20,037,115,356      cycles                           #    2.924 GHz                    
+    60,595,565,401      instructions                     #    3.02  insn per cycle         
+       6.854767446 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  591) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.413122e+00
+Avg ME (F77/C++)    = 1.4131213684432418
+Relative difference = 4.469230244973857e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 4.501882e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.543401e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.543401e+04                 )  sec^-1
+MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
+TOTAL       :     3.659601 sec
+INFO: No Floating Point Exceptions have been reported
+    10,720,660,231      cycles                           #    2.927 GHz                    
+    31,655,946,244      instructions                     #    2.95  insn per cycle         
+       3.663845747 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1823) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.413122e+00
+Avg ME (F77/C++)    = 1.4131213684432418
+Relative difference = 4.469230244973857e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 8.895284e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.057540e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.057540e+04                 )  sec^-1
+MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
+TOTAL       :     1.862541 sec
+INFO: No Floating Point Exceptions have been reported
+     5,055,557,091      cycles                           #    2.709 GHz                    
+    11,447,758,830      instructions                     #    2.26  insn per cycle         
+       1.866857319 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2321) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.413122e+00
+Avg ME (F77/C++)    = 1.4131213684418098
+Relative difference = 4.469240378321559e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 1.009826e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.030544e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.030544e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
+TOTAL       :     1.642771 sec
+INFO: No Floating Point Exceptions have been reported
+     4,486,581,844      cycles                           #    2.725 GHz                    
+    10,750,219,832      instructions                     #    2.40  insn per cycle         
+       1.647081094 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2129) (512y:   91) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.413122e+00
+Avg ME (F77/C++)    = 1.4131213684418098
+Relative difference = 4.469240378321559e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 7.046356e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.145774e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.145774e+04                 )  sec^-1
+MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
+TOTAL       :     2.347548 sec
+INFO: No Floating Point Exceptions have been reported
+     4,153,123,075      cycles                           #    1.767 GHz                    
+     6,053,102,759      instructions                     #    1.46  insn per cycle         
+       2.351953172 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1562) (512y:   95) (512z: 1432)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.413122e+00
+Avg ME (F77/C++)    = 1.4131213684418098
+Relative difference = 4.469240378321559e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+
+TEST COMPLETED
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt
index f6f4702d8b..730624ea47 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 56s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-08-08_19:53:53
+DATE: 2024-08-29_22:51:35
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.320062e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.967518e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.041410e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.278215e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.944384e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.016796e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008472e+02 +- 5.002447e+01 )  GeV^-2
-TOTAL       :     0.444288 sec
+TOTAL       :     0.451173 sec
 INFO: No Floating Point Exceptions have been reported
-     1,959,595,734      cycles                           #    2.963 GHz                    
-     2,777,994,587      instructions                     #    1.42  insn per cycle         
-       0.717899732 seconds time elapsed
+     1,919,633,470      cycles                           #    2.857 GHz                    
+     2,728,963,073      instructions                     #    1.42  insn per cycle         
+       0.729048793 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 227
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.069470e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.919373e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.975617e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.979292e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.903991e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.964886e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 6.630097e+02 +- 4.770717e+02 )  GeV^-2
-TOTAL       :     0.495533 sec
+TOTAL       :     0.499205 sec
 INFO: No Floating Point Exceptions have been reported
-     2,156,454,732      cycles                           #    2.941 GHz                    
-     3,086,518,049      instructions                     #    1.43  insn per cycle         
-       0.790560540 seconds time elapsed
+     2,102,156,543      cycles                           #    2.874 GHz                    
+     3,076,367,109      instructions                     #    1.46  insn per cycle         
+       0.788612885 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.572191e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.585337e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.585337e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.509899e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.522866e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.522866e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.009236e+02 +- 5.002643e+01 )  GeV^-2
-TOTAL       :     6.388092 sec
+TOTAL       :     6.546451 sec
 INFO: No Floating Point Exceptions have been reported
-    19,202,614,309      cycles                           #    3.005 GHz                    
-    59,612,894,743      instructions                     #    3.10  insn per cycle         
-       6.392159520 seconds time elapsed
+    19,203,967,265      cycles                           #    2.932 GHz                    
+    59,613,462,994      instructions                     #    3.10  insn per cycle         
+       6.550613990 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  959) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.292655e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.433094e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.433094e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.007790e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.143555e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.143555e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.009236e+02 +- 5.002643e+01 )  GeV^-2
-TOTAL       :     1.992839 sec
+TOTAL       :     2.063842 sec
 INFO: No Floating Point Exceptions have been reported
-     6,013,924,550      cycles                           #    3.013 GHz                    
-    17,061,326,868      instructions                     #    2.84  insn per cycle         
-       1.996457314 seconds time elapsed
+     6,028,053,387      cycles                           #    2.916 GHz                    
+    17,061,568,102      instructions                     #    2.83  insn per cycle         
+       2.067953284 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 5855) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.800495e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.863232e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.863232e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.739489e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.800542e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.800542e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008857e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     0.927310 sec
+TOTAL       :     0.959970 sec
 INFO: No Floating Point Exceptions have been reported
-     2,629,891,219      cycles                           #    2.827 GHz                    
-     6,187,073,232      instructions                     #    2.35  insn per cycle         
-       0.930846209 seconds time elapsed
+     2,633,014,395      cycles                           #    2.733 GHz                    
+     6,186,422,079      instructions                     #    2.35  insn per cycle         
+       0.964103808 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5091) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.976191e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.051455e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.051455e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.914942e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.989247e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.989247e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008857e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     0.846370 sec
+TOTAL       :     0.873915 sec
 INFO: No Floating Point Exceptions have been reported
-     2,395,634,403      cycles                           #    2.821 GHz                    
-     5,790,356,055      instructions                     #    2.42  insn per cycle         
-       0.849905167 seconds time elapsed
+     2,398,703,934      cycles                           #    2.734 GHz                    
+     5,790,632,910      instructions                     #    2.41  insn per cycle         
+       0.878146083 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4896) (512y:   36) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.518605e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.563959e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.563959e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.440202e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.483048e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.483048e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008856e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     1.098394 sec
+TOTAL       :     1.157768 sec
 INFO: No Floating Point Exceptions have been reported
-     2,076,123,552      cycles                           #    1.885 GHz                    
-     3,391,311,970      instructions                     #    1.63  insn per cycle         
-       1.102116086 seconds time elapsed
+     2,073,923,596      cycles                           #    1.786 GHz                    
+     3,391,319,853      instructions                     #    1.64  insn per cycle         
+       1.162015056 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2214) (512y:   39) (512z: 3787)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt
index 38bf1cd9c0..d8e5b06899 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 02s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-08-08_20:18:48
+DATE: 2024-08-29_23:29:23
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +57,15 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.003824e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.049696e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.049696e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.767858e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.009815e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.009815e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 1.009071e+02 +- 5.002295e+01 )  GeV^-2
-TOTAL       :     0.462593 sec
+TOTAL       :     0.464758 sec
 INFO: No Floating Point Exceptions have been reported
-     1,974,680,886      cycles                           #    2.933 GHz                    
-     2,925,643,074      instructions                     #    1.48  insn per cycle         
-       0.731432096 seconds time elapsed
+     1,927,287,410      cycles                           #    2.859 GHz                    
+     2,875,384,287      instructions                     #    1.49  insn per cycle         
+       0.732402984 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
@@ -79,15 +83,15 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.700147e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.536036e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.536036e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.583680e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.445503e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.445503e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 6.737499e+02 +- 4.776369e+02 )  GeV^-2
-TOTAL       :     0.641753 sec
+TOTAL       :     0.650296 sec
 INFO: No Floating Point Exceptions have been reported
-     2,565,792,794      cycles                           #    2.944 GHz                    
-     3,938,395,338      instructions                     #    1.53  insn per cycle         
-       0.930086671 seconds time elapsed
+     2,502,497,718      cycles                           #    2.846 GHz                    
+     3,902,267,322      instructions                     #    1.56  insn per cycle         
+       0.938009872 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -110,15 +114,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.551720e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.564557e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.564557e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.507798e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.520716e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.520716e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.009236e+02 +- 5.002643e+01 )  GeV^-2
-TOTAL       :     6.442209 sec
+TOTAL       :     6.556234 sec
 INFO: No Floating Point Exceptions have been reported
-    19,332,196,535      cycles                           #    2.999 GHz                    
-    59,617,412,156      instructions                     #    3.08  insn per cycle         
-       6.446330406 seconds time elapsed
+    19,237,442,122      cycles                           #    2.933 GHz                    
+    59,617,865,061      instructions                     #    3.10  insn per cycle         
+       6.560395287 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  959) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.229338e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.368673e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.368673e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.856144e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.991623e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.991623e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.009236e+02 +- 5.002643e+01 )  GeV^-2
-TOTAL       :     2.012620 sec
+TOTAL       :     2.108789 sec
 INFO: No Floating Point Exceptions have been reported
-     6,036,126,177      cycles                           #    2.994 GHz                    
-    17,109,389,715      instructions                     #    2.83  insn per cycle         
-       2.016763535 seconds time elapsed
+     6,150,269,807      cycles                           #    2.912 GHz                    
+    17,110,455,897      instructions                     #    2.78  insn per cycle         
+       2.113131923 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 5855) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -170,15 +174,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.740859e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.806079e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.806079e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.735458e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.798185e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.798185e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008857e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     0.964100 sec
+TOTAL       :     0.966917 sec
 INFO: No Floating Point Exceptions have been reported
-     2,661,000,573      cycles                           #    2.750 GHz                    
-     6,223,355,528      instructions                     #    2.34  insn per cycle         
-       0.968303872 seconds time elapsed
+     2,657,777,061      cycles                           #    2.739 GHz                    
+     6,223,729,758      instructions                     #    2.34  insn per cycle         
+       0.971167979 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5091) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -200,15 +204,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.800266e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.868707e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.868707e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.907068e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.983078e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.983078e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008857e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     0.933168 sec
+TOTAL       :     0.881736 sec
 INFO: No Floating Point Exceptions have been reported
-     2,423,820,124      cycles                           #    2.587 GHz                    
-     5,827,757,074      instructions                     #    2.40  insn per cycle         
-       0.937581508 seconds time elapsed
+     2,422,460,405      cycles                           #    2.736 GHz                    
+     5,827,345,262      instructions                     #    2.41  insn per cycle         
+       0.886046409 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4896) (512y:   36) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -230,15 +234,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.427750e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.470264e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.470264e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.431727e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.475110e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.475110e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008856e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     1.172250 sec
+TOTAL       :     1.169788 sec
 INFO: No Floating Point Exceptions have been reported
-     2,098,127,039      cycles                           #    1.785 GHz                    
-     3,432,639,908      instructions                     #    1.64  insn per cycle         
-       1.176441537 seconds time elapsed
+     2,100,278,925      cycles                           #    1.790 GHz                    
+     3,432,954,471      instructions                     #    1.63  insn per cycle         
+       1.174173020 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2214) (512y:   39) (512z: 3787)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt
index 0ba4eb9609..ae36751a1d 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 49s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-08-08_19:54:14
+DATE: 2024-08-29_22:51:56
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.278251e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.942254e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.021816e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.277743e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.945397e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.020092e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008472e+02 +- 5.002447e+01 )  GeV^-2
-TOTAL       :     0.446242 sec
+TOTAL       :     0.449999 sec
 INFO: No Floating Point Exceptions have been reported
-     1,972,500,118      cycles                           #    2.943 GHz                    
-     2,795,935,059      instructions                     #    1.42  insn per cycle         
-       0.726942838 seconds time elapsed
+     1,921,589,382      cycles                           #    2.855 GHz                    
+     2,723,588,998      instructions                     #    1.42  insn per cycle         
+       0.730462057 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 221
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.087674e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.947916e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.002420e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.994291e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.944236e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.006487e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 6.630097e+02 +- 4.770717e+02 )  GeV^-2
-TOTAL       :     0.494089 sec
+TOTAL       :     0.501120 sec
 INFO: No Floating Point Exceptions have been reported
-     2,134,934,271      cycles                           #    2.953 GHz                    
-     3,048,352,562      instructions                     #    1.43  insn per cycle         
-       0.779729616 seconds time elapsed
+     2,082,733,552      cycles                           #    2.864 GHz                    
+     3,004,954,151      instructions                     #    1.44  insn per cycle         
+       0.784749526 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.547958e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.560826e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.560826e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.482604e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.495412e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.495412e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.009236e+02 +- 5.002643e+01 )  GeV^-2
-TOTAL       :     6.448288 sec
+TOTAL       :     6.618213 sec
 INFO: No Floating Point Exceptions have been reported
-    19,391,308,595      cycles                           #    3.006 GHz                    
-    59,353,270,013      instructions                     #    3.06  insn per cycle         
-       6.452193679 seconds time elapsed
+    19,382,805,943      cycles                           #    2.927 GHz                    
+    59,350,891,670      instructions                     #    3.06  insn per cycle         
+       6.622341307 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1027) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.669188e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.820622e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.820622e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.395581e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.546831e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.546831e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.009236e+02 +- 5.002643e+01 )  GeV^-2
-TOTAL       :     1.907127 sec
+TOTAL       :     1.969338 sec
 INFO: No Floating Point Exceptions have been reported
-     5,746,722,793      cycles                           #    3.009 GHz                    
-    16,850,100,573      instructions                     #    2.93  insn per cycle         
-       1.910695363 seconds time elapsed
+     5,753,637,450      cycles                           #    2.917 GHz                    
+    16,850,481,816      instructions                     #    2.93  insn per cycle         
+       1.973469763 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 5610) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.563334e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.611066e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.611066e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.511813e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.557982e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.557982e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008857e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     1.065485 sec
+TOTAL       :     1.101839 sec
 INFO: No Floating Point Exceptions have been reported
-     3,007,335,634      cycles                           #    2.814 GHz                    
-     6,847,154,679      instructions                     #    2.28  insn per cycle         
-       1.069270257 seconds time elapsed
+     3,013,272,940      cycles                           #    2.726 GHz                    
+     6,847,941,445      instructions                     #    2.27  insn per cycle         
+       1.106014161 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5721) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.689887e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.745378e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.745378e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.628019e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.682162e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.682162e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008857e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     0.986999 sec
+TOTAL       :     1.024330 sec
 INFO: No Floating Point Exceptions have been reported
-     2,801,128,869      cycles                           #    2.830 GHz                    
-     6,436,964,591      instructions                     #    2.30  insn per cycle         
-       0.990525270 seconds time elapsed
+     2,804,905,619      cycles                           #    2.729 GHz                    
+     6,436,635,506      instructions                     #    2.29  insn per cycle         
+       1.028506828 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5497) (512y:   22) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.390544e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.428498e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.428498e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.315584e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.351667e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.351667e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008856e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     1.197863 sec
+TOTAL       :     1.265651 sec
 INFO: No Floating Point Exceptions have been reported
-     2,249,856,205      cycles                           #    1.874 GHz                    
-     3,755,019,516      instructions                     #    1.67  insn per cycle         
-       1.201521180 seconds time elapsed
+     2,253,548,105      cycles                           #    1.776 GHz                    
+     3,754,786,325      instructions                     #    1.67  insn per cycle         
+       1.269807096 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2445) (512y:   29) (512z: 4082)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inlL_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inlL_hrd0.txt
new file mode 100644
index 0000000000..8e08f033e6
--- /dev/null
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inlL_hrd0.txt
@@ -0,0 +1,244 @@
+
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 27s
+------------------------------------------------
+
+Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
+BACKEND=cpp512y (was cppauto)
+OMPFLAGS=
+FPTYPE='d'
+HELINL='0'
+HRDCOD='0'
+HASCURAND=hasCurand
+HASHIPRAND=hasNoHiprand
+Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
+make: Nothing to be done for 'gtestlibs'.
+
+make USEBUILDDIR=1 BACKEND=cuda
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+
+make USEBUILDDIR=1 BACKEND=cppnone
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+
+make USEBUILDDIR=1 BACKEND=cppsse4
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+
+make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+
+make USEBUILDDIR=1 BACKEND=cpp512y
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+
+make USEBUILDDIR=1 BACKEND=cpp512z
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+
+DATE: 2024-08-30_00:42:07
+
+On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inlL_hrd0/check_cuda.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 6.506816e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.984025e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.028192e+06                 )  sec^-1
+MeanMatrixElemValue         = ( 1.008472e+02 +- 5.002447e+01 )  GeV^-2
+TOTAL       :     0.469962 sec
+INFO: No Floating Point Exceptions have been reported
+     1,980,782,067      cycles                           #    2.871 GHz                    
+     2,880,947,643      instructions                     #    1.45  insn per cycle         
+       0.747299792 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inlL_hrd0/check_cuda.exe -p 64 256 1
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 136
+==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
+.........................................................................
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inlL_hrd0/check_cuda.exe -p 2048 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 7.776577e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.305815e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.333384e+06                 )  sec^-1
+MeanMatrixElemValue         = ( 6.630097e+02 +- 4.770717e+02 )  GeV^-2
+TOTAL       :     0.605092 sec
+INFO: No Floating Point Exceptions have been reported
+     2,376,867,766      cycles                           #    2.860 GHz                    
+     3,767,782,391      instructions                     #    1.59  insn per cycle         
+       0.888902694 seconds time elapsed
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inlL_hrd0/runTest_cuda.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inlL_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inlL_hrd0/fcheck_cuda.exe 2 64 2
+Avg ME (C++/GPU)   = 1.412607e+00
+Avg ME (F77/GPU)   = 1.4132214900253359
+Relative difference = 0.00043500423354547914
+OK (relative difference <= 5E-3)
+=========================================================================
+Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inlL_hrd0/check_hip.exe
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
+EvtsPerSec[Rmb+ME]     (23) = ( 2.422717e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.435000e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.435000e+04                 )  sec^-1
+MeanMatrixElemValue         = ( 1.009197e+02 +- 5.002588e+01 )  GeV^-2
+TOTAL       :     6.781890 sec
+INFO: No Floating Point Exceptions have been reported
+    19,830,062,733      cycles                           #    2.923 GHz                    
+    61,537,037,402      instructions                     #    3.10  insn per cycle         
+       6.785970800 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  437) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.413035e+00
+Avg ME (F77/C++)    = 1.4130352595211662
+Relative difference = 1.8366223491471866e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 7.955854e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.090414e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.090414e+04                 )  sec^-1
+MeanMatrixElemValue         = ( 1.009198e+02 +- 5.002590e+01 )  GeV^-2
+TOTAL       :     2.077691 sec
+INFO: No Floating Point Exceptions have been reported
+     6,053,084,055      cycles                           #    2.909 GHz                    
+    17,295,970,516      instructions                     #    2.86  insn per cycle         
+       2.081896556 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 2379) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.413036e+00
+Avg ME (F77/C++)    = 1.4130355637321941
+Relative difference = 3.0874500428943766e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 1.729150e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.789877e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.789877e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 1.008908e+02 +- 5.002900e+01 )  GeV^-2
+TOTAL       :     0.965535 sec
+INFO: No Floating Point Exceptions have been reported
+     2,647,590,764      cycles                           #    2.732 GHz                    
+     6,210,971,464      instructions                     #    2.35  insn per cycle         
+       0.969659928 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2731) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.413194e+00
+Avg ME (F77/C++)    = 1.4131936445722886
+Relative difference = 2.515066660945201e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 1.908613e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.981952e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.981952e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 1.008908e+02 +- 5.002900e+01 )  GeV^-2
+TOTAL       :     0.876436 sec
+INFO: No Floating Point Exceptions have been reported
+     2,415,108,209      cycles                           #    2.745 GHz                    
+     5,856,825,624      instructions                     #    2.43  insn per cycle         
+       0.880519857 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2602) (512y:   36) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.413194e+00
+Avg ME (F77/C++)    = 1.4131936445722886
+Relative difference = 2.515066660945201e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 1.429829e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.472097e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.472097e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 1.008908e+02 +- 5.002902e+01 )  GeV^-2
+TOTAL       :     1.166299 sec
+INFO: No Floating Point Exceptions have been reported
+     2,093,557,115      cycles                           #    1.790 GHz                    
+     3,450,773,482      instructions                     #    1.65  insn per cycle         
+       1.170506427 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2166) (512y:   39) (512z: 1606)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.413194e+00
+Avg ME (F77/C++)    = 1.4131938773954573
+Relative difference = 8.675705015764985e-08
+OK (relative difference <= 5E-3)
+=========================================================================
+
+TEST COMPLETED
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt
index b56fab2636..16d585d797 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 56s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-08-08_19:54:34
+DATE: 2024-08-29_22:52:17
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.873225e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.048994e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.062769e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.764002e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.041698e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.055474e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     0.468393 sec
+TOTAL       :     0.471629 sec
 INFO: No Floating Point Exceptions have been reported
-     2,013,463,276      cycles                           #    2.926 GHz                    
-     2,843,704,920      instructions                     #    1.41  insn per cycle         
-       0.746969806 seconds time elapsed
+     1,944,394,459      cycles                           #    2.835 GHz                    
+     2,789,943,135      instructions                     #    1.43  insn per cycle         
+       0.744922955 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.105683e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.317981e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.329407e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.090832e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.317008e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.330502e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 6.734461e+02 +- 4.775415e+02 )  GeV^-2
-TOTAL       :     0.602858 sec
+TOTAL       :     0.609477 sec
 INFO: No Floating Point Exceptions have been reported
-     2,481,502,789      cycles                           #    2.952 GHz                    
-     3,777,860,843      instructions                     #    1.52  insn per cycle         
-       0.899194246 seconds time elapsed
+     2,404,259,561      cycles                           #    2.844 GHz                    
+     3,644,904,805      instructions                     #    1.52  insn per cycle         
+       0.905916250 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.428536e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.440162e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.440162e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.377685e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.389366e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.389366e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     6.766520 sec
+TOTAL       :     6.911680 sec
 INFO: No Floating Point Exceptions have been reported
-    20,196,006,274      cycles                           #    2.983 GHz                    
-    60,947,190,146      instructions                     #    3.02  insn per cycle         
-       6.770695543 seconds time elapsed
+    20,218,013,208      cycles                           #    2.924 GHz                    
+    60,950,520,452      instructions                     #    3.01  insn per cycle         
+       6.915909091 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1220) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.786932e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.830680e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.830680e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.609877e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.653027e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.653027e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     3.442084 sec
+TOTAL       :     3.574180 sec
 INFO: No Floating Point Exceptions have been reported
-    10,443,979,206      cycles                           #    3.032 GHz                    
-    30,824,270,405      instructions                     #    2.95  insn per cycle         
-       3.445851321 seconds time elapsed
+    10,450,566,835      cycles                           #    2.921 GHz                    
+    30,821,772,550      instructions                     #    2.95  insn per cycle         
+       3.578458596 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 5350) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.470779e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.644870e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.644870e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.114697e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.284002e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.284002e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.749981 sec
+TOTAL       :     1.817728 sec
 INFO: No Floating Point Exceptions have been reported
-     4,950,819,939      cycles                           #    2.824 GHz                    
-    11,360,637,335      instructions                     #    2.29  insn per cycle         
-       1.753761622 seconds time elapsed
+     4,954,203,602      cycles                           #    2.720 GHz                    
+    11,359,039,929      instructions                     #    2.29  insn per cycle         
+       1.821986435 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4764) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.072349e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.094125e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.094125e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.033408e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.054510e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.054510e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.547382 sec
+TOTAL       :     1.605536 sec
 INFO: No Floating Point Exceptions have been reported
-     4,393,258,157      cycles                           #    2.833 GHz                    
-    10,610,345,317      instructions                     #    2.42  insn per cycle         
-       1.551099869 seconds time elapsed
+     4,392,902,582      cycles                           #    2.730 GHz                    
+    10,608,545,501      instructions                     #    2.41  insn per cycle         
+       1.609849025 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4491) (512y:   83) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.179185e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.278821e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.278821e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.852292e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.948447e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.948447e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     2.303939 sec
+TOTAL       :     2.413629 sec
 INFO: No Floating Point Exceptions have been reported
-     4,243,069,453      cycles                           #    1.839 GHz                    
-     6,166,943,639      instructions                     #    1.45  insn per cycle         
-       2.307918272 seconds time elapsed
+     4,253,955,957      cycles                           #    1.760 GHz                    
+     6,166,525,936      instructions                     #    1.45  insn per cycle         
+       2.418081364 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2117) (512y:  117) (512z: 3652)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt
index 02b75df755..fa2257a7a9 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 50s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-08-08_19:54:59
+DATE: 2024-08-29_22:52:43
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.792781e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.038946e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.052598e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.722618e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.034655e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.047991e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     0.468036 sec
+TOTAL       :     0.466796 sec
 INFO: No Floating Point Exceptions have been reported
-     1,985,001,604      cycles                           #    2.907 GHz                    
-     2,766,137,748      instructions                     #    1.39  insn per cycle         
-       0.741175013 seconds time elapsed
+     1,953,188,947      cycles                           #    2.854 GHz                    
+     2,820,575,421      instructions                     #    1.44  insn per cycle         
+       0.741421075 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.100333e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.310665e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.321752e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.084249e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.309437e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.322993e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 6.734461e+02 +- 4.775415e+02 )  GeV^-2
-TOTAL       :     0.598767 sec
+TOTAL       :     0.605761 sec
 INFO: No Floating Point Exceptions have been reported
-     2,453,028,425      cycles                           #    2.950 GHz                    
-     3,661,775,107      instructions                     #    1.49  insn per cycle         
-       0.892773102 seconds time elapsed
+     2,416,308,048      cycles                           #    2.845 GHz                    
+     3,608,210,606      instructions                     #    1.49  insn per cycle         
+       0.910078060 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.443765e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.455326e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.455326e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.377305e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.389054e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.389054e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     6.725300 sec
+TOTAL       :     6.912722 sec
 INFO: No Floating Point Exceptions have been reported
-    20,276,202,254      cycles                           #    3.014 GHz                    
-    61,176,047,563      instructions                     #    3.02  insn per cycle         
-       6.729394202 seconds time elapsed
+    20,223,820,603      cycles                           #    2.924 GHz                    
+    61,171,685,276      instructions                     #    3.02  insn per cycle         
+       6.916884719 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1272) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.782126e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.826623e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.826623e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.615705e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.659353e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.659353e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     3.445491 sec
+TOTAL       :     3.569504 sec
 INFO: No Floating Point Exceptions have been reported
-    10,362,676,163      cycles                           #    3.005 GHz                    
-    30,536,337,790      instructions                     #    2.95  insn per cycle         
-       3.449270850 seconds time elapsed
+    10,370,296,215      cycles                           #    2.902 GHz                    
+    30,533,837,695      instructions                     #    2.94  insn per cycle         
+       3.573867995 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 5154) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.061590e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.221412e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.221412e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.737237e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.894376e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.894376e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.828348 sec
+TOTAL       :     1.895286 sec
 INFO: No Floating Point Exceptions have been reported
-     5,140,078,208      cycles                           #    2.807 GHz                    
-    11,874,984,280      instructions                     #    2.31  insn per cycle         
-       1.832218653 seconds time elapsed
+     5,142,071,518      cycles                           #    2.708 GHz                    
+    11,872,872,158      instructions                     #    2.31  insn per cycle         
+       1.899520112 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4875) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.004120e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.023004e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.023004e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.694774e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.881575e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.881575e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.651331 sec
+TOTAL       :     1.709775 sec
 INFO: No Floating Point Exceptions have been reported
-     4,668,851,118      cycles                           #    2.822 GHz                    
-    11,168,266,795      instructions                     #    2.39  insn per cycle         
-       1.655171295 seconds time elapsed
+     4,671,739,679      cycles                           #    2.727 GHz                    
+    11,166,170,928      instructions                     #    2.39  insn per cycle         
+       1.713950914 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4496) (512y:  238) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.200167e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.298361e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.298361e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.779046e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.874544e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.874544e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     2.297641 sec
+TOTAL       :     2.439248 sec
 INFO: No Floating Point Exceptions have been reported
-     4,253,384,705      cycles                           #    1.849 GHz                    
-     6,407,420,579      instructions                     #    1.51  insn per cycle         
-       2.301529661 seconds time elapsed
+     4,266,577,740      cycles                           #    1.747 GHz                    
+     6,406,035,694      instructions                     #    1.50  insn per cycle         
+       2.443561271 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2013) (512y:  163) (512z: 3730)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inlL_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inlL_hrd0.txt
new file mode 100644
index 0000000000..325faa3fb7
--- /dev/null
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inlL_hrd0.txt
@@ -0,0 +1,244 @@
+
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 27s
+------------------------------------------------
+
+Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
+BACKEND=cpp512y (was cppauto)
+OMPFLAGS=
+FPTYPE='d'
+HELINL='0'
+HRDCOD='0'
+HASCURAND=hasCurand
+HASHIPRAND=hasNoHiprand
+Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
+make: Nothing to be done for 'gtestlibs'.
+
+make USEBUILDDIR=1 BACKEND=cuda
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+
+make USEBUILDDIR=1 BACKEND=cppnone
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+
+make USEBUILDDIR=1 BACKEND=cppsse4
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+
+make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+
+make USEBUILDDIR=1 BACKEND=cpp512y
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+
+make USEBUILDDIR=1 BACKEND=cpp512z
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+
+DATE: 2024-08-30_00:42:28
+
+On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inlL_hrd0/check_cuda.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 3.773393e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.060960e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.083346e+06                 )  sec^-1
+MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
+TOTAL       :     0.497081 sec
+INFO: No Floating Point Exceptions have been reported
+     2,042,067,359      cycles                           #    2.868 GHz                    
+     2,990,476,150      instructions                     #    1.46  insn per cycle         
+       0.770626952 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inlL_hrd0/check_cuda.exe -p 64 256 1
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 196
+==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
+.........................................................................
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inlL_hrd0/check_cuda.exe -p 2048 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 4.298515e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.611843e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.627935e+06                 )  sec^-1
+MeanMatrixElemValue         = ( 6.734461e+02 +- 4.775415e+02 )  GeV^-2
+TOTAL       :     0.738853 sec
+INFO: No Floating Point Exceptions have been reported
+     2,810,218,660      cycles                           #    2.874 GHz                    
+     4,705,531,330      instructions                     #    1.67  insn per cycle         
+       1.036755072 seconds time elapsed
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inlL_hrd0/runTest_cuda.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inlL_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inlL_hrd0/fcheck_cuda.exe 2 64 2
+Avg ME (C++/GPU)   = 1.413122e+00
+Avg ME (F77/GPU)   = 1.4131213755569485
+Relative difference = 4.4188898869949646e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inlL_hrd0/check_hip.exe
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
+EvtsPerSec[Rmb+ME]     (23) = ( 2.347642e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.359176e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.359176e+04                 )  sec^-1
+MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
+TOTAL       :     7.000115 sec
+INFO: No Floating Point Exceptions have been reported
+    20,510,544,192      cycles                           #    2.929 GHz                    
+    61,870,437,889      instructions                     #    3.02  insn per cycle         
+       7.004379599 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  594) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.413122e+00
+Avg ME (F77/C++)    = 1.4131213859069593
+Relative difference = 4.345647726386255e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 4.527496e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.569449e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.569449e+04                 )  sec^-1
+MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
+TOTAL       :     3.639059 sec
+INFO: No Floating Point Exceptions have been reported
+    10,601,569,695      cycles                           #    2.910 GHz                    
+    31,391,808,142      instructions                     #    2.96  insn per cycle         
+       3.643366063 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1944) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.413122e+00
+Avg ME (F77/C++)    = 1.4131213792564823
+Relative difference = 4.392710025734405e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 8.927714e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.088964e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.088964e+04                 )  sec^-1
+MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
+TOTAL       :     1.855604 sec
+INFO: No Floating Point Exceptions have been reported
+     5,057,066,493      cycles                           #    2.720 GHz                    
+    11,453,132,967      instructions                     #    2.26  insn per cycle         
+       1.859900990 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2549) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.413122e+00
+Avg ME (F77/C++)    = 1.4131213600217192
+Relative difference = 4.5288254008796884e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 1.019456e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.040353e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.040353e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
+TOTAL       :     1.627488 sec
+INFO: No Floating Point Exceptions have been reported
+     4,453,621,248      cycles                           #    2.731 GHz                    
+    10,750,899,602      instructions                     #    2.41  insn per cycle         
+       1.631845129 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2348) (512y:   82) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.413122e+00
+Avg ME (F77/C++)    = 1.4131213600217192
+Relative difference = 4.5288254008796884e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 6.837648e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.934852e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.934852e+04                 )  sec^-1
+MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
+TOTAL       :     2.418837 sec
+INFO: No Floating Point Exceptions have been reported
+     4,266,458,221      cycles                           #    1.761 GHz                    
+     6,278,608,618      instructions                     #    1.47  insn per cycle         
+       2.423156539 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2080) (512y:  117) (512z: 1593)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.413122e+00
+Avg ME (F77/C++)    = 1.4131213786174055
+Relative difference = 4.3972324717191576e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+
+TEST COMPLETED
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt
index ab0ea6da4a..ac422c575d 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 01m 15s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-08_19:55:25
+DATE: 2024-08-29_22:53:09
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.488153e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.514881e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.516998e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.484618e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.512495e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.514715e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.525204 sec
+TOTAL       :     0.529842 sec
 INFO: No Floating Point Exceptions have been reported
-     2,218,473,016      cycles                           #    2.933 GHz                    
-     3,463,122,045      instructions                     #    1.56  insn per cycle         
-       0.815780769 seconds time elapsed
+     2,204,935,727      cycles                           #    2.851 GHz                    
+     3,383,078,845      instructions                     #    1.53  insn per cycle         
+       0.834801026 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.132223e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.161610e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.162761e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.112342e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.144521e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.145898e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.033638 sec
+TOTAL       :     3.050717 sec
 INFO: No Floating Point Exceptions have been reported
-     9,809,726,664      cycles                           #    2.987 GHz                    
-    20,834,555,403      instructions                     #    2.12  insn per cycle         
-       3.343721812 seconds time elapsed
+     9,614,749,166      cycles                           #    2.908 GHz                    
+    22,078,344,118      instructions                     #    2.30  insn per cycle         
+       3.370393570 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.933106e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.934097e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.934097e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.860935e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.861845e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.861845e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     8.490765 sec
+TOTAL       :     8.819009 sec
 INFO: No Floating Point Exceptions have been reported
-    25,657,464,355      cycles                           #    3.021 GHz                    
-    78,956,678,283      instructions                     #    3.08  insn per cycle         
-       8.494928864 seconds time elapsed
+    25,677,829,436      cycles                           #    2.911 GHz                    
+    78,956,070,121      instructions                     #    3.07  insn per cycle         
+       8.823278932 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 4843) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.556899e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.560135e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.560135e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.514741e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.517882e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.517882e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     4.617381 sec
+TOTAL       :     4.672391 sec
 INFO: No Floating Point Exceptions have been reported
-    13,096,002,004      cycles                           #    2.834 GHz                    
-    39,560,686,282      instructions                     #    3.02  insn per cycle         
-       4.621306822 seconds time elapsed
+    13,098,570,246      cycles                           #    2.802 GHz                    
+    39,561,785,502      instructions                     #    3.02  insn per cycle         
+       4.676673381 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13199) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.312969e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.330861e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.330861e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.018608e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.034749e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.034749e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.979952 sec
+TOTAL       :     2.051675 sec
 INFO: No Floating Point Exceptions have been reported
-     5,592,710,730      cycles                           #    2.820 GHz                    
-    13,825,002,673      instructions                     #    2.47  insn per cycle         
-       1.983978333 seconds time elapsed
+     5,588,693,407      cycles                           #    2.720 GHz                    
+    13,823,467,867      instructions                     #    2.47  insn per cycle         
+       2.055974957 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11530) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.448686e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.470931e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.470931e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.045058e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.066544e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.066544e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.742543 sec
+TOTAL       :     1.819352 sec
 INFO: No Floating Point Exceptions have been reported
-     4,950,283,084      cycles                           #    2.836 GHz                    
-    12,507,380,266      instructions                     #    2.53  insn per cycle         
-       1.746261350 seconds time elapsed
+     4,944,679,303      cycles                           #    2.713 GHz                    
+    12,505,400,088      instructions                     #    2.53  insn per cycle         
+       1.823660626 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10449) (512y:   88) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.208746e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.222007e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.222007e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.925584e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.938157e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.938157e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.282175 sec
+TOTAL       :     2.374676 sec
 INFO: No Floating Point Exceptions have been reported
-     4,146,883,314      cycles                           #    1.815 GHz                    
-     6,393,760,552      instructions                     #    1.54  insn per cycle         
-       2.285979679 seconds time elapsed
+     4,146,979,457      cycles                           #    1.744 GHz                    
+     6,391,143,672      instructions                     #    1.54  insn per cycle         
+       2.379203599 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1974) (512y:  102) (512z: 9391)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt
index 9aa087c04f..623974ef1b 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 15s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-08_20:19:34
+DATE: 2024-08-29_23:30:10
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +57,15 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.112227e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.443687e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.443687e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.098336e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.430669e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.430669e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.518381 sec
+TOTAL       :     0.516355 sec
 INFO: No Floating Point Exceptions have been reported
-     2,176,799,915      cycles                           #    2.911 GHz                    
-     3,495,470,615      instructions                     #    1.61  insn per cycle         
-       0.808139854 seconds time elapsed
+     2,145,035,124      cycles                           #    2.878 GHz                    
+     3,433,359,795      instructions                     #    1.60  insn per cycle         
+       0.805072037 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
@@ -79,15 +83,15 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.648774e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.128576e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.128576e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.640626e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.130604e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.130604e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.310822 sec
+TOTAL       :     3.319603 sec
 INFO: No Floating Point Exceptions have been reported
-    10,679,469,031      cycles                           #    2.985 GHz                    
-    23,830,814,413      instructions                     #    2.23  insn per cycle         
-       3.633830469 seconds time elapsed
+    10,448,466,366      cycles                           #    2.908 GHz                    
+    23,059,789,642      instructions                     #    2.21  insn per cycle         
+       3.652051020 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -110,15 +114,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.923317e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.924229e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.924229e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.870734e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.871640e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.871640e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     8.538018 sec
+TOTAL       :     8.777660 sec
 INFO: No Floating Point Exceptions have been reported
-    25,699,355,856      cycles                           #    3.009 GHz                    
-    78,962,606,878      instructions                     #    3.07  insn per cycle         
-       8.542523167 seconds time elapsed
+    25,686,292,856      cycles                           #    2.925 GHz                    
+    78,962,903,501      instructions                     #    3.07  insn per cycle         
+       8.782134787 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 4843) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.605150e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.608587e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.608587e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.525474e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.528850e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.528850e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     4.559554 sec
+TOTAL       :     4.662189 sec
 INFO: No Floating Point Exceptions have been reported
-    13,117,342,563      cycles                           #    2.875 GHz                    
-    39,574,473,831      instructions                     #    3.02  insn per cycle         
-       4.563915289 seconds time elapsed
+    13,113,811,865      cycles                           #    2.811 GHz                    
+    39,572,448,827      instructions                     #    3.02  insn per cycle         
+       4.666654670 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13199) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -170,15 +174,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.187581e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.204828e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.204828e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.019644e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.036695e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.036695e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.014036 sec
+TOTAL       :     2.055512 sec
 INFO: No Floating Point Exceptions have been reported
-     5,605,896,422      cycles                           #    2.779 GHz                    
-    13,833,979,214      instructions                     #    2.47  insn per cycle         
-       2.018562637 seconds time elapsed
+     5,607,614,115      cycles                           #    2.723 GHz                    
+    13,835,145,765      instructions                     #    2.47  insn per cycle         
+       2.060008383 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11530) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -200,15 +204,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.243444e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.265975e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.265975e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.095889e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.118002e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.118002e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.784658 sec
+TOTAL       :     1.813580 sec
 INFO: No Floating Point Exceptions have been reported
-     4,964,309,016      cycles                           #    2.776 GHz                    
-    12,516,237,329      instructions                     #    2.52  insn per cycle         
-       1.788990266 seconds time elapsed
+     4,957,865,514      cycles                           #    2.728 GHz                    
+    12,515,643,969      instructions                     #    2.52  insn per cycle         
+       1.818064103 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10449) (512y:   88) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -230,15 +234,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.077629e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.090790e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.090790e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.877189e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.889576e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.889576e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.328055 sec
+TOTAL       :     2.395674 sec
 INFO: No Floating Point Exceptions have been reported
-     4,162,316,275      cycles                           #    1.785 GHz                    
-     6,401,996,872      instructions                     #    1.54  insn per cycle         
-       2.332653341 seconds time elapsed
+     4,163,517,757      cycles                           #    1.735 GHz                    
+     6,402,780,090      instructions                     #    1.54  insn per cycle         
+       2.400249588 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1974) (512y:  102) (512z: 9391)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt
index ff7f772058..b0e7b2010b 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 01s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-08_20:29:47
+DATE: 2024-08-29_23:40:33
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.507693e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.534445e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.536631e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.439093e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.464818e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.467330e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.197467e-01 +- 3.250467e-01 )  GeV^-4
-TOTAL       :     0.514407 sec
+TOTAL       :     0.513287 sec
 INFO: No Floating Point Exceptions have been reported
-     2,174,406,271      cycles                           #    2.930 GHz                    
-     3,461,893,969      instructions                     #    1.59  insn per cycle         
-       0.803766234 seconds time elapsed
+     2,137,026,227      cycles                           #    2.862 GHz                    
+     3,337,808,086      instructions                     #    1.56  insn per cycle         
+       0.808256494 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.147428e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.177075e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.178326e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.146669e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.179083e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.180475e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.252232e+02 +- 1.234346e+02 )  GeV^-4
-TOTAL       :     3.120976 sec
+TOTAL       :     3.125773 sec
 INFO: No Floating Point Exceptions have been reported
-    10,019,214,394      cycles                           #    2.972 GHz                    
-    21,025,350,474      instructions                     #    2.10  insn per cycle         
-       3.430265997 seconds time elapsed
+     9,801,640,150      cycles                           #    2.901 GHz                    
+    22,336,783,397      instructions                     #    2.28  insn per cycle         
+       3.434371661 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.913744e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.914711e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.914711e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.869360e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.870267e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.870267e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.197467e-01 +- 3.250467e-01 )  GeV^-4
-TOTAL       :     8.577743 sec
+TOTAL       :     8.781002 sec
 INFO: No Floating Point Exceptions have been reported
-    25,670,651,990      cycles                           #    2.992 GHz                    
-    78,955,406,875      instructions                     #    3.08  insn per cycle         
-       8.581763598 seconds time elapsed
+    25,678,377,688      cycles                           #    2.923 GHz                    
+    78,955,720,202      instructions                     #    3.07  insn per cycle         
+       8.785102867 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 4843) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.605176e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.608431e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.608431e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.506291e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.509424e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.509424e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.197467e-01 +- 3.250467e-01 )  GeV^-4
-TOTAL       :     4.556655 sec
+TOTAL       :     4.685167 sec
 INFO: No Floating Point Exceptions have been reported
-    13,109,013,329      cycles                           #    2.875 GHz                    
-    39,558,662,551      instructions                     #    3.02  insn per cycle         
-       4.560750410 seconds time elapsed
+    13,112,847,720      cycles                           #    2.797 GHz                    
+    39,561,253,407      instructions                     #    3.02  insn per cycle         
+       4.689284646 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13199) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.281071e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.297965e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.297965e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.042071e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.058349e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.058349e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.197467e-01 +- 3.250467e-01 )  GeV^-4
-TOTAL       :     1.988611 sec
+TOTAL       :     2.047508 sec
 INFO: No Floating Point Exceptions have been reported
-     5,595,768,969      cycles                           #    2.809 GHz                    
-    13,822,292,745      instructions                     #    2.47  insn per cycle         
-       1.992702302 seconds time elapsed
+     5,595,348,153      cycles                           #    2.728 GHz                    
+    13,822,442,672      instructions                     #    2.47  insn per cycle         
+       2.051861141 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11530) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.896901e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.917572e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.917572e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.026384e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.047273e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.047273e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.197467e-01 +- 3.250467e-01 )  GeV^-4
-TOTAL       :     1.851324 sec
+TOTAL       :     1.824976 sec
 INFO: No Floating Point Exceptions have been reported
-     4,949,173,347      cycles                           #    2.669 GHz                    
-    12,503,287,563      instructions                     #    2.53  insn per cycle         
-       1.855415164 seconds time elapsed
+     4,951,647,688      cycles                           #    2.708 GHz                    
+    12,503,474,172      instructions                     #    2.53  insn per cycle         
+       1.829249158 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10449) (512y:   88) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.307417e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.320405e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.320405e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.899169e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.911871e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.911871e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.197467e-01 +- 3.250467e-01 )  GeV^-4
-TOTAL       :     2.252212 sec
+TOTAL       :     2.385278 sec
 INFO: No Floating Point Exceptions have been reported
-     4,148,121,362      cycles                           #    1.839 GHz                    
-     6,388,958,727      instructions                     #    1.54  insn per cycle         
-       2.256422988 seconds time elapsed
+     4,153,864,303      cycles                           #    1.739 GHz                    
+     6,389,239,536      instructions                     #    1.54  insn per cycle         
+       2.389608459 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1974) (512y:  102) (512z: 9391)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt
index 8c55b22907..f54ab4104d 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 00s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-08_20:26:59
+DATE: 2024-08-29_23:37:42
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.458961e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.485253e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.488049e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.452375e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.478341e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.480594e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.514464 sec
+TOTAL       :     0.510857 sec
 INFO: No Floating Point Exceptions have been reported
-     2,130,639,833      cycles                           #    2.860 GHz                    
-     3,343,542,179      instructions                     #    1.57  insn per cycle         
-       0.805221680 seconds time elapsed
+     2,125,596,224      cycles                           #    2.863 GHz                    
+     3,340,543,841      instructions                     #    1.57  insn per cycle         
+       0.802256972 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.127051e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.156110e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.157363e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.146533e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.179036e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.180439e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.075386 sec
+TOTAL       :     3.069204 sec
 INFO: No Floating Point Exceptions have been reported
-     9,595,195,883      cycles                           #    2.879 GHz                    
-    21,169,008,885      instructions                     #    2.21  insn per cycle         
-       3.388723748 seconds time elapsed
+     9,615,019,528      cycles                           #    2.895 GHz                    
+    20,428,439,311      instructions                     #    2.12  insn per cycle         
+       3.379551964 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.853624e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.854505e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.854505e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.851531e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.852432e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.852432e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     8.854273 sec
+TOTAL       :     8.863900 sec
 INFO: No Floating Point Exceptions have been reported
-    25,673,092,183      cycles                           #    2.899 GHz                    
-    78,956,489,516      instructions                     #    3.08  insn per cycle         
-       8.858619563 seconds time elapsed
+    25,674,607,651      cycles                           #    2.896 GHz                    
+    78,957,122,456      instructions                     #    3.08  insn per cycle         
+       8.868072639 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 4843) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.555877e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.559175e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.559175e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.525536e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.528790e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.528790e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     4.618236 sec
+TOTAL       :     4.658062 sec
 INFO: No Floating Point Exceptions have been reported
-    13,105,607,424      cycles                           #    2.836 GHz                    
-    39,562,262,758      instructions                     #    3.02  insn per cycle         
-       4.622614183 seconds time elapsed
+    13,094,220,368      cycles                           #    2.809 GHz                    
+    39,559,236,862      instructions                     #    3.02  insn per cycle         
+       4.662388664 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13199) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.117944e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.134423e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.134423e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.024421e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.040536e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.040536e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.026386 sec
+TOTAL       :     2.050098 sec
 INFO: No Floating Point Exceptions have been reported
-     5,589,116,983      cycles                           #    2.754 GHz                    
-    13,823,429,494      instructions                     #    2.47  insn per cycle         
-       2.030436364 seconds time elapsed
+     5,590,810,484      cycles                           #    2.723 GHz                    
+    13,823,415,124      instructions                     #    2.47  insn per cycle         
+       2.054333314 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11530) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.385930e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.407557e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.407557e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.001072e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.021748e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.021748e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.753538 sec
+TOTAL       :     1.828463 sec
 INFO: No Floating Point Exceptions have been reported
-     4,940,731,112      cycles                           #    2.812 GHz                    
-    12,505,003,217      instructions                     #    2.53  insn per cycle         
-       1.757654269 seconds time elapsed
+     4,944,586,506      cycles                           #    2.700 GHz                    
+    12,505,236,539      instructions                     #    2.53  insn per cycle         
+       1.832828782 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10449) (512y:   88) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.329600e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.342625e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.342625e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.916008e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.928185e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.928185e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.243900 sec
+TOTAL       :     2.377965 sec
 INFO: No Floating Point Exceptions have been reported
-     4,145,687,524      cycles                           #    1.845 GHz                    
-     6,390,893,367      instructions                     #    1.54  insn per cycle         
-       2.248144727 seconds time elapsed
+     4,144,212,852      cycles                           #    1.740 GHz                    
+     6,391,211,532      instructions                     #    1.54  insn per cycle         
+       2.382275809 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1974) (512y:  102) (512z: 9391)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt
index 28e1d95034..120d5447ee 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 01s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-08_20:24:16
+DATE: 2024-08-29_23:34:57
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -50,15 +54,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.229613e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.520921e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.523094e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.168442e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.482839e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.485319e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.513514 sec
+TOTAL       :     0.513830 sec
 INFO: No Floating Point Exceptions have been reported
-     2,168,346,936      cycles                           #    2.927 GHz                    
-     3,433,459,385      instructions                     #    1.58  insn per cycle         
-       0.802152079 seconds time elapsed
+     2,147,728,669      cycles                           #    2.867 GHz                    
+     3,437,039,893      instructions                     #    1.60  insn per cycle         
+       0.808379384 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
@@ -70,15 +74,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.733483e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.157890e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.159150e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.728769e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.181058e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.182447e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.199522 sec
+TOTAL       :     3.209844 sec
 INFO: No Floating Point Exceptions have been reported
-    10,294,194,017      cycles                           #    2.982 GHz                    
-    21,521,466,269      instructions                     #    2.09  insn per cycle         
-       3.508277099 seconds time elapsed
+     9,982,588,155      cycles                           #    2.885 GHz                    
+    22,074,605,131      instructions                     #    2.21  insn per cycle         
+       3.519251437 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -100,15 +104,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.923954e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.924900e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.924900e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.870620e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.871515e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.871515e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     8.530428 sec
+TOTAL       :     8.773542 sec
 INFO: No Floating Point Exceptions have been reported
-    25,661,796,778      cycles                           #    3.007 GHz                    
-    78,954,509,974      instructions                     #    3.08  insn per cycle         
-       8.534417643 seconds time elapsed
+    25,656,425,077      cycles                           #    2.923 GHz                    
+    78,955,689,357      instructions                     #    3.08  insn per cycle         
+       8.777616977 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 4843) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -129,15 +133,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.615782e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.619130e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.619130e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.499221e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.502418e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.502418e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     4.541944 sec
+TOTAL       :     4.692707 sec
 INFO: No Floating Point Exceptions have been reported
-    13,126,189,517      cycles                           #    2.888 GHz                    
-    39,559,744,202      instructions                     #    3.01  insn per cycle         
-       4.546027002 seconds time elapsed
+    13,103,874,690      cycles                           #    2.791 GHz                    
+    39,562,042,319      instructions                     #    3.02  insn per cycle         
+       4.696908780 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13199) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -158,15 +162,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.299850e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.317113e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.317113e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.035218e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.052032e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.052032e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.982404 sec
+TOTAL       :     2.047329 sec
 INFO: No Floating Point Exceptions have been reported
-     5,586,639,772      cycles                           #    2.813 GHz                    
-    13,823,166,385      instructions                     #    2.47  insn per cycle         
-       1.986590396 seconds time elapsed
+     5,586,651,910      cycles                           #    2.724 GHz                    
+    13,823,431,614      instructions                     #    2.47  insn per cycle         
+       2.051685897 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11530) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -187,15 +191,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.384353e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.406906e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.406906e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.106245e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.127457e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.127457e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.753945 sec
+TOTAL       :     1.807636 sec
 INFO: No Floating Point Exceptions have been reported
-     4,942,572,018      cycles                           #    2.813 GHz                    
-    12,504,933,165      instructions                     #    2.53  insn per cycle         
-       1.758084275 seconds time elapsed
+     4,945,546,431      cycles                           #    2.731 GHz                    
+    12,506,428,636      instructions                     #    2.53  insn per cycle         
+       1.811855898 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10449) (512y:   88) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -216,15 +220,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.317460e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.330821e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.330821e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.912294e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.924724e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.924724e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.247518 sec
+TOTAL       :     2.378992 sec
 INFO: No Floating Point Exceptions have been reported
-     4,146,774,770      cycles                           #    1.843 GHz                    
-     6,391,452,350      instructions                     #    1.54  insn per cycle         
-       2.251569316 seconds time elapsed
+     4,143,588,187      cycles                           #    1.739 GHz                    
+     6,391,287,965      instructions                     #    1.54  insn per cycle         
+       2.383250915 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1974) (512y:  102) (512z: 9391)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt
index ef490ee27f..4c1e452219 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 53s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-08_19:55:57
+DATE: 2024-08-29_22:53:42
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.468386e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.495424e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.497730e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.474633e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.502711e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.505044e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.528153 sec
+TOTAL       :     0.525987 sec
 INFO: No Floating Point Exceptions have been reported
-     2,223,041,093      cycles                           #    2.885 GHz                    
-     3,357,279,580      instructions                     #    1.51  insn per cycle         
-       0.829273079 seconds time elapsed
+     2,173,141,636      cycles                           #    2.870 GHz                    
+     3,442,650,944      instructions                     #    1.58  insn per cycle         
+       0.816470213 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.133736e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.163273e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.164433e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.142376e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.175014e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.176392e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.026404 sec
+TOTAL       :     3.027490 sec
 INFO: No Floating Point Exceptions have been reported
-     9,787,087,404      cycles                           #    2.984 GHz                    
-    20,868,236,699      instructions                     #    2.13  insn per cycle         
-       3.335921488 seconds time elapsed
+     9,517,475,939      cycles                           #    2.901 GHz                    
+    19,988,736,260      instructions                     #    2.10  insn per cycle         
+       3.340146090 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.930451e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.931397e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.931397e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.876866e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.877757e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.877757e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     8.501967 sec
+TOTAL       :     8.744259 sec
 INFO: No Floating Point Exceptions have been reported
-    25,635,869,243      cycles                           #    3.014 GHz                    
-    78,699,985,409      instructions                     #    3.07  insn per cycle         
-       8.506017009 seconds time elapsed
+    25,630,018,477      cycles                           #    2.931 GHz                    
+    78,700,967,648      instructions                     #    3.07  insn per cycle         
+       8.748458419 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 4192) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.635004e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.638325e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.638325e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.529280e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.532488e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.532488e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     4.518323 sec
+TOTAL       :     4.653045 sec
 INFO: No Floating Point Exceptions have been reported
-    13,043,304,130      cycles                           #    2.885 GHz                    
-    39,451,387,281      instructions                     #    3.02  insn per cycle         
-       4.522544486 seconds time elapsed
+    13,049,773,576      cycles                           #    2.803 GHz                    
+    39,452,267,607      instructions                     #    3.02  insn per cycle         
+       4.657264479 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:12973) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.103214e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.119837e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.119837e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.952727e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.968616e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.968616e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.030819 sec
+TOTAL       :     2.068541 sec
 INFO: No Floating Point Exceptions have been reported
-     5,706,370,481      cycles                           #    2.806 GHz                    
-    13,911,650,507      instructions                     #    2.44  insn per cycle         
-       2.034636014 seconds time elapsed
+     5,654,997,353      cycles                           #    2.729 GHz                    
+    13,909,812,029      instructions                     #    2.46  insn per cycle         
+       2.072950411 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11592) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.209342e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.231718e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.231718e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.933565e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.954089e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.954089e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.787809 sec
+TOTAL       :     1.842015 sec
 INFO: No Floating Point Exceptions have been reported
-     4,991,279,132      cycles                           #    2.786 GHz                    
-    12,604,125,286      instructions                     #    2.53  insn per cycle         
-       1.792337833 seconds time elapsed
+     4,995,833,498      cycles                           #    2.707 GHz                    
+    12,603,386,160      instructions                     #    2.52  insn per cycle         
+       1.846187939 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10433) (512y:  240) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.276351e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.289893e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.289893e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.928234e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.940575e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.940575e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.260957 sec
+TOTAL       :     2.373698 sec
 INFO: No Floating Point Exceptions have been reported
-     4,149,253,590      cycles                           #    1.833 GHz                    
-     6,500,352,718      instructions                     #    1.57  insn per cycle         
-       2.264815173 seconds time elapsed
+     4,151,141,696      cycles                           #    1.746 GHz                    
+     6,499,236,974      instructions                     #    1.57  insn per cycle         
+       2.378031241 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1750) (512y:  194) (512z: 9387)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt
index bbaea3caef..7af0451fcc 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 02m 33s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-08_20:10:19
+DATE: 2024-08-29_23:20:39
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.246678e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.268467e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.270191e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.254301e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.279795e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.282561e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.534085 sec
+TOTAL       :     0.538286 sec
 INFO: No Floating Point Exceptions have been reported
-     2,285,518,624      cycles                           #    2.953 GHz                    
-     3,580,561,444      instructions                     #    1.57  insn per cycle         
-       0.832119310 seconds time elapsed
+     2,191,082,086      cycles                           #    2.854 GHz                    
+     3,355,829,429      instructions                     #    1.53  insn per cycle         
+       0.826481038 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.761384e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.784291e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.785252e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.754882e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.783344e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.784543e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.301764 sec
+TOTAL       :     3.311612 sec
 INFO: No Floating Point Exceptions have been reported
-    10,582,525,253      cycles                           #    2.981 GHz                    
-    22,709,986,647      instructions                     #    2.15  insn per cycle         
-       3.609006709 seconds time elapsed
+    10,400,680,075      cycles                           #    2.916 GHz                    
+    23,997,882,975      instructions                     #    2.31  insn per cycle         
+       3.622738301 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.342825e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.343311e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.343311e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.237636e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.238116e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.238116e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :    37.771526 sec
+TOTAL       :    38.708076 sec
 INFO: No Floating Point Exceptions have been reported
-   112,991,669,428      cycles                           #    2.992 GHz                    
-   144,862,430,473      instructions                     #    1.28  insn per cycle         
-      37.775737563 seconds time elapsed
+   113,002,307,863      cycles                           #    2.919 GHz                    
+   144,865,531,987      instructions                     #    1.28  insn per cycle         
+      38.712493320 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:21361) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.180115e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.182680e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.182680e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.074049e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.076486e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.076486e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     5.162984 sec
+TOTAL       :     5.341332 sec
 INFO: No Floating Point Exceptions have been reported
-    14,747,517,010      cycles                           #    2.855 GHz                    
-    37,650,782,777      instructions                     #    2.55  insn per cycle         
-       5.167050022 seconds time elapsed
+    14,754,166,844      cycles                           #    2.761 GHz                    
+    37,648,707,475      instructions                     #    2.55  insn per cycle         
+       5.345720009 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:68253) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.587961e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.601478e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.601478e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.336087e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.349641e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.349641e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.167267 sec
+TOTAL       :     2.242124 sec
 INFO: No Floating Point Exceptions have been reported
-     6,123,933,660      cycles                           #    2.822 GHz                    
-    13,061,783,520      instructions                     #    2.13  insn per cycle         
-       2.171395105 seconds time elapsed
+     6,124,700,045      cycles                           #    2.727 GHz                    
+    13,060,839,816      instructions                     #    2.13  insn per cycle         
+       2.246622192 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:46965) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.164851e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.185111e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.185111e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.868994e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.889122e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.889122e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.795482 sec
+TOTAL       :     1.855494 sec
 INFO: No Floating Point Exceptions have been reported
-     5,057,846,668      cycles                           #    2.812 GHz                    
-    11,453,287,308      instructions                     #    2.26  insn per cycle         
-       1.799543537 seconds time elapsed
+     5,071,321,771      cycles                           #    2.728 GHz                    
+    11,453,092,735      instructions                     #    2.26  insn per cycle         
+       1.859840409 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:40490) (512y:  285) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.447733e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.461062e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.461062e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.280272e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.293880e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.293880e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.208265 sec
+TOTAL       :     2.259563 sec
 INFO: No Floating Point Exceptions have been reported
-     3,952,574,407      cycles                           #    1.787 GHz                    
-     5,928,010,897      instructions                     #    1.50  insn per cycle         
-       2.212410955 seconds time elapsed
+     3,952,850,673      cycles                           #    1.747 GHz                    
+     5,926,868,768      instructions                     #    1.50  insn per cycle         
+       2.264082563 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2432) (512y:  337) (512z:39348)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt
index 7583c01cf4..db961be493 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 02m 20s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-08_20:11:26
+DATE: 2024-08-29_23:21:47
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.275171e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.299147e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.301063e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.262031e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.287266e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.289567e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.533669 sec
+TOTAL       :     0.540687 sec
 INFO: No Floating Point Exceptions have been reported
-     2,269,961,618      cycles                           #    2.940 GHz                    
-     3,538,568,106      instructions                     #    1.56  insn per cycle         
-       0.830876846 seconds time elapsed
+     2,167,162,043      cycles                           #    2.831 GHz                    
+     3,412,749,114      instructions                     #    1.57  insn per cycle         
+       0.827269874 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.755572e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.778494e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.779486e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.754345e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.782845e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.783987e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.298195 sec
+TOTAL       :     3.305990 sec
 INFO: No Floating Point Exceptions have been reported
-    10,673,699,971      cycles                           #    3.000 GHz                    
-    24,748,682,176      instructions                     #    2.32  insn per cycle         
-       3.615699896 seconds time elapsed
+    10,339,047,103      cycles                           #    2.904 GHz                    
+    21,814,042,211      instructions                     #    2.11  insn per cycle         
+       3.618232079 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.321186e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.321644e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.321644e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.213809e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.214258e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.214258e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :    37.957787 sec
+TOTAL       :    38.927029 sec
 INFO: No Floating Point Exceptions have been reported
-   113,686,913,957      cycles                           #    2.995 GHz                    
-   144,259,453,305      instructions                     #    1.27  insn per cycle         
-      37.961860960 seconds time elapsed
+   113,599,990,236      cycles                           #    2.918 GHz                    
+   144,263,471,206      instructions                     #    1.27  insn per cycle         
+      38.931468699 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:20934) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.073725e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.076096e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.076096e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.980868e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.983156e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.983156e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     5.341043 sec
+TOTAL       :     5.507958 sec
 INFO: No Floating Point Exceptions have been reported
-    15,271,797,585      cycles                           #    2.858 GHz                    
-    38,390,165,623      instructions                     #    2.51  insn per cycle         
-       5.345237036 seconds time elapsed
+    15,296,337,393      cycles                           #    2.775 GHz                    
+    38,390,334,283      instructions                     #    2.51  insn per cycle         
+       5.512252272 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:69643) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.624786e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.638797e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.638797e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.448639e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.462484e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.462484e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.157053 sec
+TOTAL       :     2.208158 sec
 INFO: No Floating Point Exceptions have been reported
-     6,008,150,983      cycles                           #    2.781 GHz                    
-    12,934,571,742      instructions                     #    2.15  insn per cycle         
-       2.161176604 seconds time elapsed
+     6,009,609,297      cycles                           #    2.717 GHz                    
+    12,934,869,074      instructions                     #    2.15  insn per cycle         
+       2.212574107 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:46091) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.062477e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.083007e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.083007e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.589282e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.608300e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.608300e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.815728 sec
+TOTAL       :     1.915769 sec
 INFO: No Floating Point Exceptions have been reported
-     5,090,244,384      cycles                           #    2.798 GHz                    
-    11,449,331,673      instructions                     #    2.25  insn per cycle         
-       1.819810741 seconds time elapsed
+     5,100,469,209      cycles                           #    2.658 GHz                    
+    11,450,493,955      instructions                     #    2.24  insn per cycle         
+       1.920190216 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:40134) (512y:  219) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.561516e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.575406e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.575406e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.250893e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.264325e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.264325e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.175028 sec
+TOTAL       :     2.268313 sec
 INFO: No Floating Point Exceptions have been reported
-     3,947,332,966      cycles                           #    1.812 GHz                    
-     5,889,708,142      instructions                     #    1.49  insn per cycle         
-       2.179231650 seconds time elapsed
+     3,950,507,577      cycles                           #    1.739 GHz                    
+     5,890,173,466      instructions                     #    1.49  insn per cycle         
+       2.272639544 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1947) (512y:  259) (512z:38926)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inlL_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inlL_hrd0.txt
new file mode 100644
index 0000000000..9599cb6934
--- /dev/null
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inlL_hrd0.txt
@@ -0,0 +1,244 @@
+
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 47s
+------------------------------------------------
+
+Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+BACKEND=cpp512y (was cppauto)
+OMPFLAGS=
+FPTYPE='d'
+HELINL='0'
+HRDCOD='0'
+HASCURAND=hasCurand
+HASHIPRAND=hasNoHiprand
+Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
+make: Nothing to be done for 'gtestlibs'.
+
+make USEBUILDDIR=1 BACKEND=cuda
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+
+make USEBUILDDIR=1 BACKEND=cppnone
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+
+make USEBUILDDIR=1 BACKEND=cppsse4
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+
+make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+
+make USEBUILDDIR=1 BACKEND=cpp512y
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+
+make USEBUILDDIR=1 BACKEND=cpp512z
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+
+DATE: 2024-08-30_00:42:54
+
+On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inlL_hrd0/check_cuda.exe -p 64 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 2.911214e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.931216e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.932858e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
+TOTAL       :     0.541005 sec
+INFO: No Floating Point Exceptions have been reported
+     2,194,844,017      cycles                           #    2.865 GHz                    
+     3,368,265,485      instructions                     #    1.53  insn per cycle         
+       0.823509438 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inlL_hrd0/check_cuda.exe -p 64 256 1
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
+==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
+.........................................................................
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inlL_hrd0/check_cuda.exe -p 2048 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 3.311779e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.332604e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.333493e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
+TOTAL       :     3.497346 sec
+INFO: No Floating Point Exceptions have been reported
+    10,907,775,905      cycles                           #    2.913 GHz                    
+    23,821,302,001      instructions                     #    2.18  insn per cycle         
+       3.804349930 seconds time elapsed
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inlL_hrd0/runTest_cuda.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inlL_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inlL_hrd0/fcheck_cuda.exe 2 64 2
+Avg ME (C++/GPU)   = 6.626675e-04
+Avg ME (F77/GPU)   = 6.6266731198158122E-004
+Relative difference = 2.837296513854949e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inlL_hrd0/check_hip.exe
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inlL_hrd0/check_cpp.exe -p 64 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
+EvtsPerSec[Rmb+ME]     (23) = ( 1.791241e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.792070e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.792070e+03                 )  sec^-1
+MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
+TOTAL       :     9.162205 sec
+INFO: No Floating Point Exceptions have been reported
+    26,854,044,241      cycles                           #    2.930 GHz                    
+    82,185,691,113      instructions                     #    3.06  insn per cycle         
+       9.166490735 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 4238) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 6.626675e-04
+Avg ME (F77/C++)    = 6.6266731198141144E-004
+Relative difference = 2.837299076015613e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inlL_hrd0/check_cpp.exe -p 64 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 3.380720e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.383647e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.383647e+03                 )  sec^-1
+MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
+TOTAL       :     4.857349 sec
+INFO: No Floating Point Exceptions have been reported
+    13,599,772,498      cycles                           #    2.798 GHz                    
+    40,494,665,742      instructions                     #    2.98  insn per cycle         
+       4.861712637 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 8237) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 6.626675e-04
+Avg ME (F77/C++)    = 6.6266731198141133E-004
+Relative difference = 2.8372990776517314e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inlL_hrd0/check_cpp.exe -p 64 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 7.693790e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.708910e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.708910e+03                 )  sec^-1
+MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
+TOTAL       :     2.137987 sec
+INFO: No Floating Point Exceptions have been reported
+     5,780,953,566      cycles                           #    2.700 GHz                    
+    14,170,611,904      instructions                     #    2.45  insn per cycle         
+       2.142400461 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8274) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 6.626675e-04
+Avg ME (F77/C++)    = 6.6266731198157320E-004
+Relative difference = 2.837296634927675e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inlL_hrd0/check_cpp.exe -p 64 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 8.776664e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.795804e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.795804e+03                 )  sec^-1
+MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
+TOTAL       :     1.875227 sec
+INFO: No Floating Point Exceptions have been reported
+     5,135,358,013      cycles                           #    2.733 GHz                    
+    13,031,799,327      instructions                     #    2.54  insn per cycle         
+       1.879498964 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8105) (512y:   93) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 6.626675e-04
+Avg ME (F77/C++)    = 6.6266731198157320E-004
+Relative difference = 2.837296634927675e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inlL_hrd0/check_cpp.exe -p 64 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 6.773731e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.785508e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.785508e+03                 )  sec^-1
+MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
+TOTAL       :     2.427708 sec
+INFO: No Floating Point Exceptions have been reported
+     4,220,573,267      cycles                           #    1.736 GHz                    
+     6,654,250,658      instructions                     #    1.58  insn per cycle         
+       2.431985711 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2094) (512y:  107) (512z: 6939)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 6.626675e-04
+Avg ME (F77/C++)    = 6.6266731198157320E-004
+Relative difference = 2.837296634927675e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+
+TEST COMPLETED
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt
index 52d8759019..caf859f330 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 58s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-08_19:56:30
+DATE: 2024-08-29_22:54:16
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.984596e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.027561e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.032406e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.974829e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.019268e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.026437e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059596e+00 +- 2.368053e+00 )  GeV^-4
-TOTAL       :     0.485881 sec
+TOTAL       :     0.488083 sec
 INFO: No Floating Point Exceptions have been reported
-     2,058,871,536      cycles                           #    2.917 GHz                    
-     3,048,657,677      instructions                     #    1.48  insn per cycle         
-       0.765585250 seconds time elapsed
+     2,011,611,296      cycles                           #    2.862 GHz                    
+     3,016,167,250      instructions                     #    1.50  insn per cycle         
+       0.763948000 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.127584e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.186636e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.189605e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.210072e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.275111e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.278094e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.664703e+00 +- 5.072736e+00 )  GeV^-4
-TOTAL       :     1.790632 sec
+TOTAL       :     1.784736 sec
 INFO: No Floating Point Exceptions have been reported
-     5,978,175,900      cycles                           #    2.960 GHz                    
-    12,554,229,706      instructions                     #    2.10  insn per cycle         
-       2.078428019 seconds time elapsed
+     5,838,577,722      cycles                           #    2.898 GHz                    
+    12,186,585,442      instructions                     #    2.09  insn per cycle         
+       2.071487175 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.983107e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.984075e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.984075e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.926933e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.927895e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.927895e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060121e+00 +- 2.367902e+00 )  GeV^-4
-TOTAL       :     8.275184 sec
+TOTAL       :     8.516057 sec
 INFO: No Floating Point Exceptions have been reported
-    24,981,677,575      cycles                           #    3.018 GHz                    
-    79,112,697,083      instructions                     #    3.17  insn per cycle         
-       8.279194518 seconds time elapsed
+    24,970,984,207      cycles                           #    2.931 GHz                    
+    79,109,755,994      instructions                     #    3.17  insn per cycle         
+       8.520078553 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3573) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.049042e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.062007e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.062007e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.961795e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.974179e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.974179e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060119e+00 +- 2.367901e+00 )  GeV^-4
-TOTAL       :     2.331496 sec
+TOTAL       :     2.360659 sec
 INFO: No Floating Point Exceptions have been reported
-     6,513,667,582      cycles                           #    2.790 GHz                    
-    20,270,685,743      instructions                     #    3.11  insn per cycle         
-       2.335321002 seconds time elapsed
+     6,513,112,442      cycles                           #    2.755 GHz                    
+    20,270,717,088      instructions                     #    3.11  insn per cycle         
+       2.364792746 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13785) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.631322e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.638001e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.638001e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.566079e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.572375e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.572375e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     1.010094 sec
+TOTAL       :     1.052140 sec
 INFO: No Floating Point Exceptions have been reported
-     2,858,902,160      cycles                           #    2.822 GHz                    
-     7,066,281,657      instructions                     #    2.47  insn per cycle         
-       1.013626411 seconds time elapsed
+     2,869,893,884      cycles                           #    2.719 GHz                    
+     7,065,821,933      instructions                     #    2.46  insn per cycle         
+       1.056261642 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12058) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.855078e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.863833e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.863833e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.792686e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.801200e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.801200e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     0.888854 sec
+TOTAL       :     0.919902 sec
 INFO: No Floating Point Exceptions have been reported
-     2,514,609,187      cycles                           #    2.820 GHz                    
-     6,403,227,199      instructions                     #    2.55  insn per cycle         
-       0.892442076 seconds time elapsed
+     2,517,840,272      cycles                           #    2.727 GHz                    
+     6,403,424,925      instructions                     #    2.54  insn per cycle         
+       0.923978611 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11026) (512y:   43) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.472481e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.477974e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.477974e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.398821e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.403770e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.403770e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060562e+00 +- 2.367612e+00 )  GeV^-4
-TOTAL       :     1.118887 sec
+TOTAL       :     1.177484 sec
 INFO: No Floating Point Exceptions have been reported
-     2,071,045,676      cycles                           #    1.846 GHz                    
-     3,304,181,825      instructions                     #    1.60  insn per cycle         
-       1.122589043 seconds time elapsed
+     2,069,515,653      cycles                           #    1.752 GHz                    
+     3,303,689,754      instructions                     #    1.60  insn per cycle         
+       1.181725896 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2591) (512y:   46) (512z: 9609)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt
index d4f5540c08..4384aa126c 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 03s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-08_20:20:08
+DATE: 2024-08-29_23:30:44
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +57,15 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.362722e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.966550e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.966550e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.349206e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.974366e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.974366e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.048178e+00 +- 2.364571e+00 )  GeV^-4
-TOTAL       :     0.475517 sec
+TOTAL       :     0.473565 sec
 INFO: No Floating Point Exceptions have been reported
-     2,001,123,741      cycles                           #    2.916 GHz                    
-     3,014,989,818      instructions                     #    1.51  insn per cycle         
-       0.744972192 seconds time elapsed
+     1,958,980,880      cycles                           #    2.856 GHz                    
+     2,988,475,752      instructions                     #    1.53  insn per cycle         
+       0.743220272 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
@@ -79,15 +83,15 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.951093e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.086269e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.086269e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.926520e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.073479e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.073479e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.641709e+00 +- 4.994248e+00 )  GeV^-4
-TOTAL       :     1.963357 sec
+TOTAL       :     1.969743 sec
 INFO: No Floating Point Exceptions have been reported
-     6,464,131,212      cycles                           #    2.938 GHz                    
-    13,280,566,465      instructions                     #    2.05  insn per cycle         
-       2.255825453 seconds time elapsed
+     6,416,487,251      cycles                           #    2.894 GHz                    
+    13,765,769,658      instructions                     #    2.15  insn per cycle         
+       2.275672841 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -110,15 +114,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.961986e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.962995e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.962995e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.920489e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.921406e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.921406e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060121e+00 +- 2.367902e+00 )  GeV^-4
-TOTAL       :     8.366737 sec
+TOTAL       :     8.546975 sec
 INFO: No Floating Point Exceptions have been reported
-    25,004,224,949      cycles                           #    2.987 GHz                    
-    79,113,889,000      instructions                     #    3.16  insn per cycle         
-       8.370993372 seconds time elapsed
+    24,967,095,829      cycles                           #    2.920 GHz                    
+    79,114,043,176      instructions                     #    3.17  insn per cycle         
+       8.551105311 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3573) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.168882e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.181926e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.181926e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.964392e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.977246e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.977246e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060119e+00 +- 2.367901e+00 )  GeV^-4
-TOTAL       :     2.295100 sec
+TOTAL       :     2.362536 sec
 INFO: No Floating Point Exceptions have been reported
-     6,522,736,001      cycles                           #    2.838 GHz                    
-    20,279,496,113      instructions                     #    3.11  insn per cycle         
-       2.299251518 seconds time elapsed
+     6,526,475,046      cycles                           #    2.759 GHz                    
+    20,281,153,306      instructions                     #    3.11  insn per cycle         
+       2.366796104 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13785) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -170,15 +174,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.604472e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.610985e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.610985e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.576931e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.583321e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.583321e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     1.029832 sec
+TOTAL       :     1.047656 sec
 INFO: No Floating Point Exceptions have been reported
-     2,869,187,737      cycles                           #    2.777 GHz                    
-     7,075,475,577      instructions                     #    2.47  insn per cycle         
-       1.033942723 seconds time elapsed
+     2,876,976,191      cycles                           #    2.737 GHz                    
+     7,076,241,584      instructions                     #    2.46  insn per cycle         
+       1.051948309 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12058) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -200,15 +204,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.863942e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.872787e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.872787e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.775555e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.783879e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.783879e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     0.887626 sec
+TOTAL       :     0.931498 sec
 INFO: No Floating Point Exceptions have been reported
-     2,527,038,904      cycles                           #    2.836 GHz                    
-     6,413,204,152      instructions                     #    2.54  insn per cycle         
-       0.891739175 seconds time elapsed
+     2,525,182,211      cycles                           #    2.700 GHz                    
+     6,413,494,225      instructions                     #    2.54  insn per cycle         
+       0.936017248 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11026) (512y:   43) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -230,15 +234,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.473762e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.479361e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.479361e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.390162e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.395169e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.395169e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060562e+00 +- 2.367612e+00 )  GeV^-4
-TOTAL       :     1.120677 sec
+TOTAL       :     1.187644 sec
 INFO: No Floating Point Exceptions have been reported
-     2,080,597,436      cycles                           #    1.851 GHz                    
-     3,313,716,206      instructions                     #    1.59  insn per cycle         
-       1.124889543 seconds time elapsed
+     2,081,030,338      cycles                           #    1.747 GHz                    
+     3,314,848,476      instructions                     #    1.59  insn per cycle         
+       1.191904774 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2591) (512y:   46) (512z: 9609)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt
index 2bbd6d0428..5f6b002bb3 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 00s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-08_20:30:20
+DATE: 2024-08-29_23:41:06
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.027396e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.072992e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.077839e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.965237e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.008721e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.013692e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.159396e-01 +- 3.238803e-01 )  GeV^-4
-TOTAL       :     0.472420 sec
+TOTAL       :     0.469895 sec
 INFO: No Floating Point Exceptions have been reported
-     2,017,335,926      cycles                           #    2.929 GHz                    
-     2,996,516,741      instructions                     #    1.49  insn per cycle         
-       0.747617629 seconds time elapsed
+     1,989,054,745      cycles                           #    2.864 GHz                    
+     3,002,672,709      instructions                     #    1.51  insn per cycle         
+       0.751303855 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.176066e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.236543e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.239377e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.126762e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.189675e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.192592e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.094367e+02 +- 1.071509e+02 )  GeV^-4
-TOTAL       :     1.869944 sec
+TOTAL       :     1.874744 sec
 INFO: No Floating Point Exceptions have been reported
-     6,204,679,090      cycles                           #    2.959 GHz                    
-    13,136,993,437      instructions                     #    2.12  insn per cycle         
-       2.155017166 seconds time elapsed
+     6,008,746,998      cycles                           #    2.857 GHz                    
+    12,059,694,785      instructions                     #    2.01  insn per cycle         
+       2.159599467 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.981113e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.982134e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.982134e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.922848e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.923819e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.923819e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.208459e-01 +- 3.253446e-01 )  GeV^-4
-TOTAL       :     8.283937 sec
+TOTAL       :     8.535682 sec
 INFO: No Floating Point Exceptions have been reported
-    24,969,353,482      cycles                           #    3.013 GHz                    
-    79,108,034,680      instructions                     #    3.17  insn per cycle         
-       8.287825380 seconds time elapsed
+    24,979,869,216      cycles                           #    2.925 GHz                    
+    79,110,524,140      instructions                     #    3.17  insn per cycle         
+       8.539754584 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3573) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.181056e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.194443e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.194443e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.973837e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.986611e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.986611e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.208457e-01 +- 3.253445e-01 )  GeV^-4
-TOTAL       :     2.289520 sec
+TOTAL       :     2.357528 sec
 INFO: No Floating Point Exceptions have been reported
-     6,518,141,305      cycles                           #    2.843 GHz                    
-    20,270,157,027      instructions                     #    3.11  insn per cycle         
-       2.293380252 seconds time elapsed
+     6,515,014,758      cycles                           #    2.761 GHz                    
+    20,269,142,798      instructions                     #    3.11  insn per cycle         
+       2.361661819 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13785) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.629677e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.636717e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.636717e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.574724e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.581587e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.581587e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.214978e-01 +- 3.255521e-01 )  GeV^-4
-TOTAL       :     1.012223 sec
+TOTAL       :     1.047415 sec
 INFO: No Floating Point Exceptions have been reported
-     2,864,292,228      cycles                           #    2.821 GHz                    
-     7,063,008,029      instructions                     #    2.47  insn per cycle         
-       1.016182729 seconds time elapsed
+     2,863,177,251      cycles                           #    2.724 GHz                    
+     7,063,220,596      instructions                     #    2.47  insn per cycle         
+       1.051770639 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12058) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.830887e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.839546e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.839546e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.791876e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.800569e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.800569e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.214978e-01 +- 3.255521e-01 )  GeV^-4
-TOTAL       :     0.901658 sec
+TOTAL       :     0.921846 sec
 INFO: No Floating Point Exceptions have been reported
-     2,522,018,356      cycles                           #    2.787 GHz                    
-     6,399,988,861      instructions                     #    2.54  insn per cycle         
-       0.905644388 seconds time elapsed
+     2,521,844,440      cycles                           #    2.725 GHz                    
+     6,401,695,337      instructions                     #    2.54  insn per cycle         
+       0.925999634 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11026) (512y:   43) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.485210e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.490986e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.490986e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.393441e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.398473e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.398473e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.214981e-01 +- 3.255523e-01 )  GeV^-4
-TOTAL       :     1.110909 sec
+TOTAL       :     1.183808 sec
 INFO: No Floating Point Exceptions have been reported
-     2,072,711,689      cycles                           #    1.860 GHz                    
-     3,301,709,135      instructions                     #    1.59  insn per cycle         
-       1.114884740 seconds time elapsed
+     2,075,399,533      cycles                           #    1.749 GHz                    
+     3,302,834,379      instructions                     #    1.59  insn per cycle         
+       1.187854862 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2591) (512y:   46) (512z: 9609)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt
index 687ea21e82..5a9fcaa215 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 01s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-08_20:27:32
+DATE: 2024-08-29_23:38:16
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.974387e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.019107e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.024136e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.005321e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.051255e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.056194e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059596e+00 +- 2.368053e+00 )  GeV^-4
-TOTAL       :     0.465731 sec
+TOTAL       :     0.469314 sec
 INFO: No Floating Point Exceptions have been reported
-     1,986,250,676      cycles                           #    2.933 GHz                    
-     2,951,574,048      instructions                     #    1.49  insn per cycle         
-       0.733704221 seconds time elapsed
+     1,943,160,703      cycles                           #    2.856 GHz                    
+     2,972,314,757      instructions                     #    1.53  insn per cycle         
+       0.737370242 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.127905e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.186845e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.189533e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.174738e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.237805e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.240772e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.664703e+00 +- 5.072736e+00 )  GeV^-4
-TOTAL       :     1.821509 sec
+TOTAL       :     1.820268 sec
 INFO: No Floating Point Exceptions have been reported
-     6,099,068,812      cycles                           #    2.975 GHz                    
-    13,255,673,376      instructions                     #    2.17  insn per cycle         
-       2.106639688 seconds time elapsed
+     5,892,605,459      cycles                           #    2.879 GHz                    
+    11,953,174,508      instructions                     #    2.03  insn per cycle         
+       2.103372158 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.982878e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.983848e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.983848e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.891389e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.892289e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.892289e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060121e+00 +- 2.367902e+00 )  GeV^-4
-TOTAL       :     8.276232 sec
+TOTAL       :     8.675984 sec
 INFO: No Floating Point Exceptions have been reported
-    24,992,064,451      cycles                           #    3.019 GHz                    
-    79,108,890,354      instructions                     #    3.17  insn per cycle         
-       8.280274971 seconds time elapsed
+    24,971,528,629      cycles                           #    2.877 GHz                    
+    79,109,972,373      instructions                     #    3.17  insn per cycle         
+       8.679991949 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3573) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.180915e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.194829e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.194829e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.872830e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.885581e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.885581e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060119e+00 +- 2.367901e+00 )  GeV^-4
-TOTAL       :     2.288781 sec
+TOTAL       :     2.390976 sec
 INFO: No Floating Point Exceptions have been reported
-     6,519,434,997      cycles                           #    2.844 GHz                    
-    20,271,064,648      instructions                     #    3.11  insn per cycle         
-       2.292801258 seconds time elapsed
+     6,521,637,162      cycles                           #    2.724 GHz                    
+    20,271,182,025      instructions                     #    3.11  insn per cycle         
+       2.395096620 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13785) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.639199e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.645912e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.645912e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.530401e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.536759e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.536759e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     1.005317 sec
+TOTAL       :     1.076707 sec
 INFO: No Floating Point Exceptions have been reported
-     2,861,574,039      cycles                           #    2.837 GHz                    
-     7,065,482,922      instructions                     #    2.47  insn per cycle         
-       1.009367222 seconds time elapsed
+     2,865,147,176      cycles                           #    2.656 GHz                    
+     7,066,009,717      instructions                     #    2.47  insn per cycle         
+       1.083434992 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12058) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.841221e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.849583e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.849583e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.763488e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.771736e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.771736e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     0.895518 sec
+TOTAL       :     0.934798 sec
 INFO: No Floating Point Exceptions have been reported
-     2,517,844,676      cycles                           #    2.802 GHz                    
-     6,403,839,691      instructions                     #    2.54  insn per cycle         
-       0.899537508 seconds time elapsed
+     2,518,912,552      cycles                           #    2.685 GHz                    
+     6,403,623,689      instructions                     #    2.54  insn per cycle         
+       0.938856790 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11026) (512y:   43) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.455203e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.460404e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.460404e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.394629e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.399612e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.399612e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060562e+00 +- 2.367612e+00 )  GeV^-4
-TOTAL       :     1.132212 sec
+TOTAL       :     1.181219 sec
 INFO: No Floating Point Exceptions have been reported
-     2,067,552,649      cycles                           #    1.821 GHz                    
-     3,303,460,015      instructions                     #    1.60  insn per cycle         
-       1.136266053 seconds time elapsed
+     2,069,401,084      cycles                           #    1.747 GHz                    
+     3,303,621,434      instructions                     #    1.60  insn per cycle         
+       1.185349026 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2591) (512y:   46) (512z: 9609)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt
index 5238dd29f1..4931fb2466 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 00s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-08_20:24:48
+DATE: 2024-08-29_23:35:30
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -50,15 +54,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.461156e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.032316e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.037418e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.452614e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.032417e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.037434e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.048178e+00 +- 2.364571e+00 )  GeV^-4
-TOTAL       :     0.471716 sec
+TOTAL       :     0.470442 sec
 INFO: No Floating Point Exceptions have been reported
-     2,015,572,444      cycles                           #    2.959 GHz                    
-     3,048,101,818      instructions                     #    1.51  insn per cycle         
-       0.739787706 seconds time elapsed
+     1,990,742,039      cycles                           #    2.864 GHz                    
+     2,996,109,900      instructions                     #    1.51  insn per cycle         
+       0.751854703 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
@@ -70,15 +74,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.217590e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.274346e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.276990e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.165289e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.245358e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.248369e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.641709e+00 +- 4.994248e+00 )  GeV^-4
-TOTAL       :     1.888870 sec
+TOTAL       :     1.898543 sec
 INFO: No Floating Point Exceptions have been reported
-     6,296,963,935      cycles                           #    2.979 GHz                    
-    13,479,190,689      instructions                     #    2.14  insn per cycle         
-       2.172551421 seconds time elapsed
+     6,178,614,639      cycles                           #    2.905 GHz                    
+    13,438,267,033      instructions                     #    2.17  insn per cycle         
+       2.185826437 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -100,15 +104,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.967176e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.968130e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.968130e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.928370e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.929306e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.929306e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060121e+00 +- 2.367902e+00 )  GeV^-4
-TOTAL       :     8.342097 sec
+TOTAL       :     8.509778 sec
 INFO: No Floating Point Exceptions have been reported
-    24,950,965,102      cycles                           #    2.990 GHz                    
-    79,109,236,780      instructions                     #    3.17  insn per cycle         
-       8.346055445 seconds time elapsed
+    24,989,263,511      cycles                           #    2.936 GHz                    
+    79,114,106,677      instructions                     #    3.17  insn per cycle         
+       8.513783620 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3573) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -129,15 +133,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.089881e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.103174e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.103174e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.954225e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.966789e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.966789e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060119e+00 +- 2.367901e+00 )  GeV^-4
-TOTAL       :     2.317816 sec
+TOTAL       :     2.363221 sec
 INFO: No Floating Point Exceptions have been reported
-     6,512,194,963      cycles                           #    2.805 GHz                    
-    20,270,944,427      instructions                     #    3.11  insn per cycle         
-       2.322212487 seconds time elapsed
+     6,520,268,594      cycles                           #    2.755 GHz                    
+    20,272,411,862      instructions                     #    3.11  insn per cycle         
+       2.367321549 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13785) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -158,15 +162,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.538805e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.544913e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.544913e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.578872e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.585242e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.585242e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     1.070841 sec
+TOTAL       :     1.043675 sec
 INFO: No Floating Point Exceptions have been reported
-     2,864,836,878      cycles                           #    2.667 GHz                    
-     7,066,173,206      instructions                     #    2.47  insn per cycle         
-       1.075040197 seconds time elapsed
+     2,864,987,448      cycles                           #    2.736 GHz                    
+     7,066,433,957      instructions                     #    2.47  insn per cycle         
+       1.047699347 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12058) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -187,15 +191,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.841038e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.849527e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.849527e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.792624e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.800903e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.800903e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     0.895722 sec
+TOTAL       :     0.919863 sec
 INFO: No Floating Point Exceptions have been reported
-     2,515,535,185      cycles                           #    2.798 GHz                    
-     6,403,562,449      instructions                     #    2.55  insn per cycle         
-       0.899557326 seconds time elapsed
+     2,518,456,002      cycles                           #    2.728 GHz                    
+     6,403,348,870      instructions                     #    2.54  insn per cycle         
+       0.924030112 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11026) (512y:   43) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -216,15 +220,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.475627e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.481124e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.481124e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.396708e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.401761e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.401761e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060562e+00 +- 2.367612e+00 )  GeV^-4
-TOTAL       :     1.116628 sec
+TOTAL       :     1.179267 sec
 INFO: No Floating Point Exceptions have been reported
-     2,068,334,570      cycles                           #    1.847 GHz                    
-     3,303,479,670      instructions                     #    1.60  insn per cycle         
-       1.120666931 seconds time elapsed
+     2,067,882,758      cycles                           #    1.748 GHz                    
+     3,303,840,347      instructions                     #    1.60  insn per cycle         
+       1.183653607 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2591) (512y:   46) (512z: 9609)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt
index 498b2cd37c..53da48f73b 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 54s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-08_19:56:56
+DATE: 2024-08-29_22:54:42
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.966632e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.010698e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.016169e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.983038e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.030651e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.035775e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059596e+00 +- 2.368053e+00 )  GeV^-4
-TOTAL       :     0.489605 sec
+TOTAL       :     0.485658 sec
 INFO: No Floating Point Exceptions have been reported
-     2,010,594,089      cycles                           #    2.844 GHz                    
-     3,012,973,454      instructions                     #    1.50  insn per cycle         
-       0.767009476 seconds time elapsed
+     2,018,364,855      cycles                           #    2.868 GHz                    
+     3,042,795,140      instructions                     #    1.51  insn per cycle         
+       0.761253338 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.185325e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.243689e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.246525e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.194816e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.259321e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.262341e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.664703e+00 +- 5.072736e+00 )  GeV^-4
-TOTAL       :     1.784742 sec
+TOTAL       :     1.788438 sec
 INFO: No Floating Point Exceptions have been reported
-     6,010,360,971      cycles                           #    2.981 GHz                    
-    12,082,269,886      instructions                     #    2.01  insn per cycle         
-       2.072759359 seconds time elapsed
+     5,850,668,315      cycles                           #    2.899 GHz                    
+    12,627,354,962      instructions                     #    2.16  insn per cycle         
+       2.076625668 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.982152e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.983118e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.983118e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.930772e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.931709e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.931709e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060121e+00 +- 2.367902e+00 )  GeV^-4
-TOTAL       :     8.279488 sec
+TOTAL       :     8.499103 sec
 INFO: No Floating Point Exceptions have been reported
-    24,906,847,273      cycles                           #    3.008 GHz                    
-    78,843,477,297      instructions                     #    3.17  insn per cycle         
-       8.283438125 seconds time elapsed
+    24,882,769,420      cycles                           #    2.927 GHz                    
+    78,843,195,491      instructions                     #    3.17  insn per cycle         
+       8.503048390 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3093) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.430488e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.444488e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.444488e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.188799e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.202194e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.202194e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060119e+00 +- 2.367901e+00 )  GeV^-4
-TOTAL       :     2.211830 sec
+TOTAL       :     2.286108 sec
 INFO: No Floating Point Exceptions have been reported
-     6,461,373,436      cycles                           #    2.917 GHz                    
-    20,229,460,939      instructions                     #    3.13  insn per cycle         
-       2.215383125 seconds time elapsed
+     6,462,798,604      cycles                           #    2.823 GHz                    
+    20,230,881,275      instructions                     #    3.13  insn per cycle         
+       2.290393769 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13497) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.546141e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.552346e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.552346e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.515511e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.521438e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.521438e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     1.065436 sec
+TOTAL       :     1.086872 sec
 INFO: No Floating Point Exceptions have been reported
-     2,970,223,700      cycles                           #    2.780 GHz                    
-     7,206,483,333      instructions                     #    2.43  insn per cycle         
-       1.069132793 seconds time elapsed
+     2,971,860,963      cycles                           #    2.725 GHz                    
+     7,206,762,027      instructions                     #    2.42  insn per cycle         
+       1.091113117 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12440) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.798890e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.807066e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.807066e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.734369e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.742330e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.742330e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     0.916539 sec
+TOTAL       :     0.950268 sec
 INFO: No Floating Point Exceptions have been reported
-     2,599,305,235      cycles                           #    2.826 GHz                    
-     6,544,414,590      instructions                     #    2.52  insn per cycle         
-       0.920171410 seconds time elapsed
+     2,601,308,056      cycles                           #    2.727 GHz                    
+     6,544,650,602      instructions                     #    2.52  insn per cycle         
+       0.954512299 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11454) (512y:   26) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.428262e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.433365e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.433365e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.341554e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.346314e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.346314e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060562e+00 +- 2.367612e+00 )  GeV^-4
-TOTAL       :     1.153100 sec
+TOTAL       :     1.227523 sec
 INFO: No Floating Point Exceptions have been reported
-     2,140,036,710      cycles                           #    1.851 GHz                    
-     3,461,118,107      instructions                     #    1.62  insn per cycle         
-       1.156674320 seconds time elapsed
+     2,143,780,946      cycles                           #    1.742 GHz                    
+     3,461,620,552      instructions                     #    1.61  insn per cycle         
+       1.231794178 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3027) (512y:   25) (512z: 9681)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt
index dc9ca7a530..e4e84be90e 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 02m 26s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-08_20:12:32
+DATE: 2024-08-29_23:22:55
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.067673e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.110658e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.115133e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.043184e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.091633e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.097432e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059597e+00 +- 2.368053e+00 )  GeV^-4
-TOTAL       :     0.487879 sec
+TOTAL       :     0.492322 sec
 INFO: No Floating Point Exceptions have been reported
-     2,053,159,539      cycles                           #    2.919 GHz                    
-     3,075,135,999      instructions                     #    1.50  insn per cycle         
-       0.764389501 seconds time elapsed
+     2,017,019,452      cycles                           #    2.853 GHz                    
+     3,009,481,944      instructions                     #    1.49  insn per cycle         
+       0.767952594 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.681005e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.744501e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.747278e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.652929e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.728860e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.732304e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.664703e+00 +- 5.072736e+00 )  GeV^-4
-TOTAL       :     1.731074 sec
+TOTAL       :     1.733097 sec
 INFO: No Floating Point Exceptions have been reported
-     5,778,197,761      cycles                           #    2.951 GHz                    
-    12,437,674,784      instructions                     #    2.15  insn per cycle         
-       2.017655879 seconds time elapsed
+     5,734,163,299      cycles                           #    2.902 GHz                    
+    12,183,112,948      instructions                     #    2.12  insn per cycle         
+       2.035084293 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.722501e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.723307e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.723307e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.606445e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.607261e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.607261e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059969e+00 +- 2.367799e+00 )  GeV^-4
-TOTAL       :    28.664558 sec
+TOTAL       :    29.257397 sec
 INFO: No Floating Point Exceptions have been reported
-    85,759,268,786      cycles                           #    2.992 GHz                    
-   135,287,125,941      instructions                     #    1.58  insn per cycle         
-      28.668460894 seconds time elapsed
+    85,581,367,468      cycles                           #    2.925 GHz                    
+   135,289,170,958      instructions                     #    1.58  insn per cycle         
+      29.261520448 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:15198) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.988288e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.001222e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.001222e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.800616e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.812635e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.812635e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059962e+00 +- 2.367792e+00 )  GeV^-4
-TOTAL       :     2.351494 sec
+TOTAL       :     2.416640 sec
 INFO: No Floating Point Exceptions have been reported
-     6,754,834,567      cycles                           #    2.869 GHz                    
-    19,356,472,261      instructions                     #    2.87  insn per cycle         
-       2.355469886 seconds time elapsed
+     6,748,661,340      cycles                           #    2.789 GHz                    
+    19,356,380,273      instructions                     #    2.87  insn per cycle         
+       2.420837675 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:69590) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.466081e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.471571e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.471571e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.426073e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.431391e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.431391e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060903e+00 +- 2.367377e+00 )  GeV^-4
-TOTAL       :     1.123603 sec
+TOTAL       :     1.155116 sec
 INFO: No Floating Point Exceptions have been reported
-     3,163,501,117      cycles                           #    2.807 GHz                    
-     6,791,828,071      instructions                     #    2.15  insn per cycle         
-       1.127610138 seconds time elapsed
+     3,167,666,280      cycles                           #    2.734 GHz                    
+     6,792,057,786      instructions                     #    2.14  insn per cycle         
+       1.159384177 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:48998) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.760032e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.767850e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.767850e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.723429e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.731571e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.731571e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060903e+00 +- 2.367377e+00 )  GeV^-4
-TOTAL       :     0.936650 sec
+TOTAL       :     0.956806 sec
 INFO: No Floating Point Exceptions have been reported
-     2,623,882,438      cycles                           #    2.794 GHz                    
-     5,969,895,302      instructions                     #    2.28  insn per cycle         
-       0.940643059 seconds time elapsed
+     2,628,431,994      cycles                           #    2.738 GHz                    
+     5,970,160,001      instructions                     #    2.27  insn per cycle         
+       0.961004875 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:42589) (512y:   11) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.479077e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.484827e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.484827e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.388958e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.393857e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.393857e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060905e+00 +- 2.367377e+00 )  GeV^-4
-TOTAL       :     1.113882 sec
+TOTAL       :     1.186044 sec
 INFO: No Floating Point Exceptions have been reported
-     2,068,747,571      cycles                           #    1.851 GHz                    
-     3,493,400,176      instructions                     #    1.69  insn per cycle         
-       1.117954016 seconds time elapsed
+     2,074,683,359      cycles                           #    1.744 GHz                    
+     3,493,764,691      instructions                     #    1.68  insn per cycle         
+       1.190513991 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5186) (512y:    3) (512z:44834)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt
index df0f71d174..4a7fc1519c 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 02m 18s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-08_20:13:21
+DATE: 2024-08-29_23:23:45
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.128808e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.173626e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.178585e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.126041e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.174718e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.179833e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059597e+00 +- 2.368053e+00 )  GeV^-4
-TOTAL       :     0.487050 sec
+TOTAL       :     0.489322 sec
 INFO: No Floating Point Exceptions have been reported
-     2,067,516,202      cycles                           #    2.920 GHz                    
-     3,084,461,624      instructions                     #    1.49  insn per cycle         
-       0.767079444 seconds time elapsed
+     2,017,513,819      cycles                           #    2.863 GHz                    
+     2,950,795,242      instructions                     #    1.46  insn per cycle         
+       0.765967894 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.729947e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.794330e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.797099e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.715485e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.792937e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.796402e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.664703e+00 +- 5.072736e+00 )  GeV^-4
-TOTAL       :     1.715330 sec
+TOTAL       :     1.717851 sec
 INFO: No Floating Point Exceptions have been reported
-     5,790,416,249      cycles                           #    2.963 GHz                    
-    12,405,778,334      instructions                     #    2.14  insn per cycle         
-       2.012725573 seconds time elapsed
+     5,615,997,179      cycles                           #    2.878 GHz                    
+    11,675,413,905      instructions                     #    2.08  insn per cycle         
+       2.007827364 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.739276e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.740108e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.740108e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.589879e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.590664e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.590664e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059969e+00 +- 2.367799e+00 )  GeV^-4
-TOTAL       :    28.579010 sec
+TOTAL       :    29.343679 sec
 INFO: No Floating Point Exceptions have been reported
-    85,869,035,147      cycles                           #    3.005 GHz                    
-   135,713,098,525      instructions                     #    1.58  insn per cycle         
-      28.582934987 seconds time elapsed
+    85,677,631,333      cycles                           #    2.920 GHz                    
+   135,714,984,488      instructions                     #    1.58  insn per cycle         
+      29.347732882 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:15490) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.656997e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.668108e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.668108e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.717186e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.728871e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.728871e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059962e+00 +- 2.367792e+00 )  GeV^-4
-TOTAL       :     2.468183 sec
+TOTAL       :     2.446420 sec
 INFO: No Floating Point Exceptions have been reported
-     6,838,146,467      cycles                           #    2.767 GHz                    
-    19,407,163,330      instructions                     #    2.84  insn per cycle         
-       2.472172726 seconds time elapsed
+     6,827,807,387      cycles                           #    2.787 GHz                    
+    19,406,848,342      instructions                     #    2.84  insn per cycle         
+       2.450574027 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:69621) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.494743e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.500456e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.500456e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.447940e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.453395e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.453395e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060903e+00 +- 2.367377e+00 )  GeV^-4
-TOTAL       :     1.101868 sec
+TOTAL       :     1.137607 sec
 INFO: No Floating Point Exceptions have been reported
-     3,102,166,074      cycles                           #    2.807 GHz                    
-     6,715,779,639      instructions                     #    2.16  insn per cycle         
-       1.105919768 seconds time elapsed
+     3,109,396,020      cycles                           #    2.725 GHz                    
+     6,715,608,220      instructions                     #    2.16  insn per cycle         
+       1.141840390 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:47685) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.757205e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.764907e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.764907e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.708140e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.715722e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.715722e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060903e+00 +- 2.367377e+00 )  GeV^-4
-TOTAL       :     0.937783 sec
+TOTAL       :     0.965076 sec
 INFO: No Floating Point Exceptions have been reported
-     2,624,045,983      cycles                           #    2.788 GHz                    
-     5,968,641,196      instructions                     #    2.27  insn per cycle         
-       0.941620580 seconds time elapsed
+     2,632,093,230      cycles                           #    2.718 GHz                    
+     5,969,809,941      instructions                     #    2.27  insn per cycle         
+       0.969318362 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:41870) (512y:   13) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.475717e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.481089e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.481089e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.389945e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.395095e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.395095e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060905e+00 +- 2.367377e+00 )  GeV^-4
-TOTAL       :     1.116160 sec
+TOTAL       :     1.185082 sec
 INFO: No Floating Point Exceptions have been reported
-     2,072,491,943      cycles                           #    1.851 GHz                    
-     3,486,963,775      instructions                     #    1.68  insn per cycle         
-       1.120311238 seconds time elapsed
+     2,071,495,587      cycles                           #    1.743 GHz                    
+     3,487,122,412      instructions                     #    1.68  insn per cycle         
+       1.189465321 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4150) (512y:    4) (512z:44485)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inlL_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inlL_hrd0.txt
new file mode 100644
index 0000000000..7fd81ba39f
--- /dev/null
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inlL_hrd0.txt
@@ -0,0 +1,244 @@
+
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 33s
+------------------------------------------------
+
+Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+BACKEND=cpp512y (was cppauto)
+OMPFLAGS=
+FPTYPE='d'
+HELINL='0'
+HRDCOD='0'
+HASCURAND=hasCurand
+HASHIPRAND=hasNoHiprand
+Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
+make: Nothing to be done for 'gtestlibs'.
+
+make USEBUILDDIR=1 BACKEND=cuda
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+
+make USEBUILDDIR=1 BACKEND=cppnone
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+
+make USEBUILDDIR=1 BACKEND=cppsse4
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+
+make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+
+make USEBUILDDIR=1 BACKEND=cpp512y
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+
+make USEBUILDDIR=1 BACKEND=cpp512z
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+
+DATE: 2024-08-30_00:43:28
+
+On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inlL_hrd0/check_cuda.exe -p 64 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 4.121377e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.142849e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.145343e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 4.059597e+00 +- 2.368053e+00 )  GeV^-4
+TOTAL       :     0.509496 sec
+INFO: No Floating Point Exceptions have been reported
+     2,079,755,872      cycles                           #    2.863 GHz                    
+     3,172,417,833      instructions                     #    1.53  insn per cycle         
+       0.784399848 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inlL_hrd0/check_cuda.exe -p 64 256 1
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
+==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
+.........................................................................
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inlL_hrd0/check_cuda.exe -p 2048 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 5.845295e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.878835e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.880342e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 6.664703e+00 +- 5.072736e+00 )  GeV^-4
+TOTAL       :     2.264393 sec
+INFO: No Floating Point Exceptions have been reported
+     7,258,060,700      cycles                           #    2.910 GHz                    
+    15,988,080,837      instructions                     #    2.20  insn per cycle         
+       2.552890986 seconds time elapsed
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inlL_hrd0/runTest_cuda.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inlL_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inlL_hrd0/fcheck_cuda.exe 2 64 2
+Avg ME (C++/GPU)   = 6.626454e-04
+Avg ME (F77/GPU)   = 6.6262669187466816E-004
+Relative difference = 2.8232483515128973e-05
+OK (relative difference <= 5E-3)
+=========================================================================
+Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inlL_hrd0/check_hip.exe
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inlL_hrd0/check_cpp.exe -p 64 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
+EvtsPerSec[Rmb+ME]     (23) = ( 1.884084e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.884989e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.884989e+03                 )  sec^-1
+MeanMatrixElemValue         = ( 4.060121e+00 +- 2.367902e+00 )  GeV^-4
+TOTAL       :     8.709970 sec
+INFO: No Floating Point Exceptions have been reported
+    25,569,512,744      cycles                           #    2.935 GHz                    
+    80,861,203,182      instructions                     #    3.16  insn per cycle         
+       8.714291659 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1813) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 6.627487e-04
+Avg ME (F77/C++)    = 6.6274865377109748E-004
+Relative difference = 6.975329041303311e-08
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inlL_hrd0/check_cpp.exe -p 64 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 6.727521e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.739435e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.739435e+03                 )  sec^-1
+MeanMatrixElemValue         = ( 4.060120e+00 +- 2.367901e+00 )  GeV^-4
+TOTAL       :     2.442724 sec
+INFO: No Floating Point Exceptions have been reported
+     6,742,459,218      cycles                           #    2.756 GHz                    
+    20,806,148,618      instructions                     #    3.09  insn per cycle         
+       2.446854070 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 8759) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 6.627486e-04
+Avg ME (F77/C++)    = 6.6274862025858527E-004
+Relative difference = 3.056752633262151e-08
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inlL_hrd0/check_cpp.exe -p 64 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 1.541008e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.547280e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.547280e+04                 )  sec^-1
+MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
+TOTAL       :     1.069374 sec
+INFO: No Floating Point Exceptions have been reported
+     2,931,564,539      cycles                           #    2.732 GHz                    
+     7,262,291,877      instructions                     #    2.48  insn per cycle         
+       1.073515861 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8738) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 6.627194e-04
+Avg ME (F77/C++)    = 6.6271939544810393E-004
+Relative difference = 6.868511888963001e-09
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inlL_hrd0/check_cpp.exe -p 64 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 1.722316e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.729816e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.729816e+04                 )  sec^-1
+MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
+TOTAL       :     0.957267 sec
+INFO: No Floating Point Exceptions have been reported
+     2,624,675,868      cycles                           #    2.732 GHz                    
+     6,668,764,468      instructions                     #    2.54  insn per cycle         
+       0.961477034 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8622) (512y:   48) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 6.627194e-04
+Avg ME (F77/C++)    = 6.6271939544810393E-004
+Relative difference = 6.868511888963001e-09
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inlL_hrd0/check_cpp.exe -p 64 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 1.365151e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.370072e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.370072e+04                 )  sec^-1
+MeanMatrixElemValue         = ( 4.060562e+00 +- 2.367612e+00 )  GeV^-4
+TOTAL       :     1.206541 sec
+INFO: No Floating Point Exceptions have been reported
+     2,120,518,134      cycles                           #    1.752 GHz                    
+     3,438,591,283      instructions                     #    1.62  insn per cycle         
+       1.210740203 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2700) (512y:   51) (512z: 7113)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 6.627195e-04
+Avg ME (F77/C++)    = 6.6271952146242791E-004
+Relative difference = 3.2385387578884745e-08
+OK (relative difference <= 5E-3)
+=========================================================================
+
+TEST COMPLETED
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt
index f906b484d1..5f8d97cb52 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 01m 00s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-08_19:57:21
+DATE: 2024-08-29_22:55:08
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.456351e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.482973e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.485002e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.467119e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.493597e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.495917e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.527206 sec
+TOTAL       :     0.527200 sec
 INFO: No Floating Point Exceptions have been reported
-     2,263,706,765      cycles                           #    2.945 GHz                    
-     3,529,595,149      instructions                     #    1.56  insn per cycle         
-       0.828954022 seconds time elapsed
+     2,179,141,526      cycles                           #    2.863 GHz                    
+     3,401,942,513      instructions                     #    1.56  insn per cycle         
+       0.820608598 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.128784e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.158212e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.159533e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.126981e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.159542e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.160911e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.057239 sec
+TOTAL       :     3.036741 sec
 INFO: No Floating Point Exceptions have been reported
-     9,783,417,122      cycles                           #    2.925 GHz                    
-    13,211,264,053      instructions                     #    1.35  insn per cycle         
-       3.405402734 seconds time elapsed
+     9,557,865,433      cycles                           #    2.900 GHz                    
+    21,892,950,666      instructions                     #    2.29  insn per cycle         
+       3.354787545 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.903780e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.904695e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.904695e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.846810e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.847689e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.847689e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     8.621676 sec
+TOTAL       :     8.886925 sec
 INFO: No Floating Point Exceptions have been reported
-    25,964,721,381      cycles                           #    3.010 GHz                    
-    79,427,591,787      instructions                     #    3.06  insn per cycle         
-       8.626023484 seconds time elapsed
+    25,956,193,105      cycles                           #    2.920 GHz                    
+    79,426,766,928      instructions                     #    3.06  insn per cycle         
+       8.891107748 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 4776) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.603827e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.607327e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.607327e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.498157e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.501294e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.501294e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     4.557849 sec
+TOTAL       :     4.694558 sec
 INFO: No Floating Point Exceptions have been reported
-    12,814,190,735      cycles                           #    2.810 GHz                    
-    38,825,158,190      instructions                     #    3.03  insn per cycle         
-       4.561789335 seconds time elapsed
+    12,815,310,250      cycles                           #    2.728 GHz                    
+    38,825,821,027      instructions                     #    3.03  insn per cycle         
+       4.698934635 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13172) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.224833e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.241665e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.241665e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.030362e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.046971e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.046971e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.000761 sec
+TOTAL       :     2.048463 sec
 INFO: No Floating Point Exceptions have been reported
-     5,588,116,210      cycles                           #    2.789 GHz                    
-    13,618,090,861      instructions                     #    2.44  insn per cycle         
-       2.004606328 seconds time elapsed
+     5,583,919,538      cycles                           #    2.721 GHz                    
+    13,616,921,209      instructions                     #    2.44  insn per cycle         
+       2.052769914 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11415) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.076409e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.097653e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.097653e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.166890e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.188415e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.188415e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.813694 sec
+TOTAL       :     1.795655 sec
 INFO: No Floating Point Exceptions have been reported
-     4,900,228,417      cycles                           #    2.697 GHz                    
-    12,298,153,916      instructions                     #    2.51  insn per cycle         
-       1.817598978 seconds time elapsed
+     4,900,555,717      cycles                           #    2.724 GHz                    
+    12,295,343,674      instructions                     #    2.51  insn per cycle         
+       1.799955144 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10319) (512y:   79) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.275673e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.288563e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.288563e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.882736e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.894897e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.894897e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.261390 sec
+TOTAL       :     2.389543 sec
 INFO: No Floating Point Exceptions have been reported
-     4,176,196,803      cycles                           #    1.844 GHz                    
-     6,391,790,037      instructions                     #    1.53  insn per cycle         
-       2.265279894 seconds time elapsed
+     4,173,275,375      cycles                           #    1.744 GHz                    
+     6,390,919,884      instructions                     #    1.53  insn per cycle         
+       2.394037924 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1957) (512y:   93) (512z: 9359)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt
index 965f537970..c03b5276d2 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 55s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-08_19:57:54
+DATE: 2024-08-29_22:55:41
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.478905e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.505299e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.507625e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.484238e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.511264e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.513583e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.523820 sec
+TOTAL       :     0.526008 sec
 INFO: No Floating Point Exceptions have been reported
-     2,217,657,303      cycles                           #    2.936 GHz                    
-     3,422,937,672      instructions                     #    1.54  insn per cycle         
-       0.814906080 seconds time elapsed
+     2,165,601,272      cycles                           #    2.860 GHz                    
+     3,419,597,650      instructions                     #    1.58  insn per cycle         
+       0.816374932 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.142523e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.171945e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.173230e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.140598e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.173249e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.174631e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.034284 sec
+TOTAL       :     3.029444 sec
 INFO: No Floating Point Exceptions have been reported
-     9,867,106,252      cycles                           #    2.970 GHz                    
-    19,377,940,372      instructions                     #    1.96  insn per cycle         
-       3.381320729 seconds time elapsed
+     9,525,068,079      cycles                           #    2.901 GHz                    
+    21,450,197,973      instructions                     #    2.25  insn per cycle         
+       3.341128233 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.898812e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.899704e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.899704e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.846603e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.847484e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.847484e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     8.643841 sec
+TOTAL       :     8.888642 sec
 INFO: No Floating Point Exceptions have been reported
-    26,013,311,554      cycles                           #    3.009 GHz                    
-    79,457,517,298      instructions                     #    3.05  insn per cycle         
-       8.647992970 seconds time elapsed
+    25,965,403,941      cycles                           #    2.920 GHz                    
+    79,452,311,631      instructions                     #    3.06  insn per cycle         
+       8.892834624 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 4432) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.611561e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.614888e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.614888e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.472255e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.475382e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.475382e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     4.547888 sec
+TOTAL       :     4.729321 sec
 INFO: No Floating Point Exceptions have been reported
-    12,837,773,076      cycles                           #    2.821 GHz                    
-    38,782,082,140      instructions                     #    3.02  insn per cycle         
-       4.551612597 seconds time elapsed
+    12,829,809,001      cycles                           #    2.711 GHz                    
+    38,779,302,159      instructions                     #    3.02  insn per cycle         
+       4.733575001 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:12934) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.352238e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.369622e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.369622e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.011768e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.027837e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.027837e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.970486 sec
+TOTAL       :     2.053124 sec
 INFO: No Floating Point Exceptions have been reported
-     5,585,325,981      cycles                           #    2.830 GHz                    
-    13,732,293,539      instructions                     #    2.46  insn per cycle         
-       1.974370273 seconds time elapsed
+     5,584,905,205      cycles                           #    2.716 GHz                    
+    13,731,123,403      instructions                     #    2.46  insn per cycle         
+       2.057449536 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11498) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.400061e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.421825e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.421825e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.018510e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.039054e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.039054e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.751328 sec
+TOTAL       :     1.824760 sec
 INFO: No Floating Point Exceptions have been reported
-     4,952,817,402      cycles                           #    2.822 GHz                    
-    12,422,492,733      instructions                     #    2.51  insn per cycle         
-       1.755554143 seconds time elapsed
+     4,960,617,330      cycles                           #    2.713 GHz                    
+    12,424,075,646      instructions                     #    2.50  insn per cycle         
+       1.829110632 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10310) (512y:  239) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.219259e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.232248e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.232248e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.840487e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.852650e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.852650e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.278823 sec
+TOTAL       :     2.403874 sec
 INFO: No Floating Point Exceptions have been reported
-     4,182,901,935      cycles                           #    1.833 GHz                    
-     6,495,418,480      instructions                     #    1.55  insn per cycle         
-       2.282695112 seconds time elapsed
+     4,178,528,364      cycles                           #    1.736 GHz                    
+     6,494,444,056      instructions                     #    1.55  insn per cycle         
+       2.408217411 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1780) (512y:  191) (512z: 9368)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inlL_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inlL_hrd0.txt
new file mode 100644
index 0000000000..829ae6fed2
--- /dev/null
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inlL_hrd0.txt
@@ -0,0 +1,244 @@
+
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 34s
+------------------------------------------------
+
+Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+BACKEND=cpp512y (was cppauto)
+OMPFLAGS=
+FPTYPE='d'
+HELINL='0'
+HRDCOD='0'
+HASCURAND=hasCurand
+HASHIPRAND=hasNoHiprand
+Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
+make: Nothing to be done for 'gtestlibs'.
+
+make USEBUILDDIR=1 BACKEND=cuda
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+
+make USEBUILDDIR=1 BACKEND=cppnone
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+
+make USEBUILDDIR=1 BACKEND=cppsse4
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+
+make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+
+make USEBUILDDIR=1 BACKEND=cpp512y
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+
+make USEBUILDDIR=1 BACKEND=cpp512z
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+
+DATE: 2024-08-30_00:43:55
+
+On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inlL_hrd0/check_cuda.exe -p 64 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 2.903948e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.923944e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.925978e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
+TOTAL       :     0.544715 sec
+INFO: No Floating Point Exceptions have been reported
+     2,194,142,899      cycles                           #    2.853 GHz                    
+     3,401,253,801      instructions                     #    1.55  insn per cycle         
+       0.827967372 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inlL_hrd0/check_cuda.exe -p 64 256 1
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
+==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
+.........................................................................
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inlL_hrd0/check_cuda.exe -p 2048 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 3.272178e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.292614e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.293474e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
+TOTAL       :     3.512492 sec
+INFO: No Floating Point Exceptions have been reported
+    10,946,047,367      cycles                           #    2.903 GHz                    
+    24,325,553,098      instructions                     #    2.22  insn per cycle         
+       3.826590611 seconds time elapsed
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inlL_hrd0/runTest_cuda.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inlL_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inlL_hrd0/fcheck_cuda.exe 2 64 2
+Avg ME (C++/GPU)   = 6.626675e-04
+Avg ME (F77/GPU)   = 6.6266732376103494E-004
+Relative difference = 2.659538381540814e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inlL_hrd0/check_hip.exe
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inlL_hrd0/check_cpp.exe -p 64 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
+EvtsPerSec[Rmb+ME]     (23) = ( 1.779171e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.779981e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.779981e+03                 )  sec^-1
+MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
+TOTAL       :     9.224237 sec
+INFO: No Floating Point Exceptions have been reported
+    27,047,034,828      cycles                           #    2.931 GHz                    
+    82,690,917,418      instructions                     #    3.06  insn per cycle         
+       9.228401772 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 4164) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 6.626675e-04
+Avg ME (F77/C++)    = 6.6266731406016235E-004
+Relative difference = 2.8059296349552523e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inlL_hrd0/check_cpp.exe -p 64 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 3.343521e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.346413e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.346413e+03                 )  sec^-1
+MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
+TOTAL       :     4.911545 sec
+INFO: No Floating Point Exceptions have been reported
+    13,353,399,628      cycles                           #    2.717 GHz                    
+    39,763,868,313      instructions                     #    2.98  insn per cycle         
+       4.916005139 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 8197) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 6.626675e-04
+Avg ME (F77/C++)    = 6.6266730246908442E-004
+Relative difference = 2.98084507782618e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inlL_hrd0/check_cpp.exe -p 64 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 7.712985e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.727919e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.727919e+03                 )  sec^-1
+MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
+TOTAL       :     2.132583 sec
+INFO: No Floating Point Exceptions have been reported
+     5,759,796,239      cycles                           #    2.696 GHz                    
+    13,979,817,915      instructions                     #    2.43  insn per cycle         
+       2.136909323 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8119) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 6.626675e-04
+Avg ME (F77/C++)    = 6.6266730409276857E-004
+Relative difference = 2.956342832710188e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inlL_hrd0/check_cpp.exe -p 64 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 8.892828e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.913112e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.913112e+03                 )  sec^-1
+MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
+TOTAL       :     1.850723 sec
+INFO: No Floating Point Exceptions have been reported
+     5,075,349,901      cycles                           #    2.737 GHz                    
+    12,841,924,314      instructions                     #    2.53  insn per cycle         
+       1.855074437 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 7957) (512y:   79) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 6.626675e-04
+Avg ME (F77/C++)    = 6.6266730409276857E-004
+Relative difference = 2.956342832710188e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inlL_hrd0/check_cpp.exe -p 64 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 6.722976e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.734551e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.734551e+03                 )  sec^-1
+MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
+TOTAL       :     2.446043 sec
+INFO: No Floating Point Exceptions have been reported
+     4,257,119,154      cycles                           #    1.738 GHz                    
+     6,661,754,960      instructions                     #    1.56  insn per cycle         
+       2.450378922 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2069) (512y:   93) (512z: 6880)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 6.626675e-04
+Avg ME (F77/C++)    = 6.6266730409276857E-004
+Relative difference = 2.956342832710188e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+
+TEST COMPLETED
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
index aed0b00b44..8fb9ffcb63 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
@@ -1,6 +1,6 @@
 
 ------------------------------------------------
-Preliminary build completed in 0d 00h 13m 11s
+Preliminary build completed in 0d 00h 14m 20s
 ------------------------------------------------
 
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
@@ -44,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 
-DATE: 2024-08-28_22:09:17
+DATE: 2024-08-29_22:57:33
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.063038e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.063437e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.063626e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.074025e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.074408e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.074613e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     2.552546 sec
+TOTAL       :     2.427313 sec
 INFO: No Floating Point Exceptions have been reported
-     7,969,059,552      cycles                           #    2.893 GHz                    
-    17,401,037,642      instructions                     #    2.18  insn per cycle         
-       2.954791685 seconds time elapsed
+     8,007,770,360      cycles                           #    2.905 GHz                    
+    17,844,373,075      instructions                     #    2.23  insn per cycle         
+       2.813382822 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.259402e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.261632e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.261912e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.214624e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.216736e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.217011e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856249e-04 +- 8.329951e-05 )  GeV^-6
-TOTAL       :     3.997932 sec
+TOTAL       :     4.008082 sec
 INFO: No Floating Point Exceptions have been reported
-    12,585,047,715      cycles                           #    2.912 GHz                    
-    29,739,266,768      instructions                     #    2.36  insn per cycle         
-       4.379850043 seconds time elapsed
+    12,658,170,825      cycles                           #    2.916 GHz                    
+    27,773,386,314      instructions                     #    2.19  insn per cycle         
+       4.398692801 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -101,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.083869e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.084097e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.084097e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.557310e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.557536e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.557536e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     6.536770 sec
+TOTAL       :     6.987063 sec
 INFO: No Floating Point Exceptions have been reported
-    18,925,108,902      cycles                           #    2.894 GHz                    
-    53,904,723,767      instructions                     #    2.85  insn per cycle         
-       6.540767069 seconds time elapsed
+    18,965,145,224      cycles                           #    2.713 GHz                    
+    53,902,719,260      instructions                     #    2.84  insn per cycle         
+       6.991136344 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:32425) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -130,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.537822e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.537916e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.537916e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.549425e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.549510e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.549510e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     3.434365 sec
+TOTAL       :     3.409407 sec
 INFO: No Floating Point Exceptions have been reported
-     9,964,139,970      cycles                           #    2.899 GHz                    
-    27,150,624,518      instructions                     #    2.72  insn per cycle         
-       3.438404316 seconds time elapsed
+     9,942,874,655      cycles                           #    2.914 GHz                    
+    27,152,194,685      instructions                     #    2.73  insn per cycle         
+       3.413395307 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:96499) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -159,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.336918e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.337312e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.337312e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.265590e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.265969e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.265969e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.583780 sec
+TOTAL       :     1.617957 sec
 INFO: No Floating Point Exceptions have been reported
-     4,293,557,962      cycles                           #    2.705 GHz                    
-     9,590,149,972      instructions                     #    2.23  insn per cycle         
-       1.587790520 seconds time elapsed
+     4,341,974,803      cycles                           #    2.678 GHz                    
+     9,590,139,782      instructions                     #    2.21  insn per cycle         
+       1.622179116 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:84971) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -188,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.835352e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.836003e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.836003e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.848619e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.849166e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.849166e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.378567 sec
+TOTAL       :     1.373871 sec
 INFO: No Floating Point Exceptions have been reported
-     3,738,350,469      cycles                           #    2.705 GHz                    
-     8,514,195,736      instructions                     #    2.28  insn per cycle         
-       1.382567882 seconds time elapsed
+     3,731,717,521      cycles                           #    2.710 GHz                    
+     8,514,052,827      instructions                     #    2.28  insn per cycle         
+       1.377919646 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:80619) (512y:   89) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -217,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.399596e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.400143e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.400143e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.389946e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.390460e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.390460e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.555703 sec
+TOTAL       :     1.559836 sec
 INFO: No Floating Point Exceptions have been reported
-     2,693,364,168      cycles                           #    1.728 GHz                    
-     4,280,800,287      instructions                     #    1.59  insn per cycle         
-       1.559932195 seconds time elapsed
+     2,691,203,106      cycles                           #    1.722 GHz                    
+     4,280,751,304      instructions                     #    1.59  insn per cycle         
+       1.564007681 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2852) (512y:  103) (512z:79119)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt
index e1baa342f4..ceb15640ef 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 53s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 
-DATE: 2024-08-08_20:20:33
+DATE: 2024-08-29_23:31:10
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +57,15 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.064923e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.065845e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.065845e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.070150e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.071117e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.071117e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     2.386081 sec
+TOTAL       :     2.392528 sec
 INFO: No Floating Point Exceptions have been reported
-     8,068,364,516      cycles                           #    2.980 GHz                    
-    18,499,320,498      instructions                     #    2.29  insn per cycle         
-       2.766222042 seconds time elapsed
+     7,866,237,206      cycles                           #    2.901 GHz                    
+    17,312,381,597      instructions                     #    2.20  insn per cycle         
+       2.769857787 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
@@ -79,15 +83,15 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.216459e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.248148e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.248148e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.184574e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.218458e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.218458e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856249e-04 +- 8.329951e-05 )  GeV^-6
-TOTAL       :     3.985205 sec
+TOTAL       :     3.992587 sec
 INFO: No Floating Point Exceptions have been reported
-    12,879,401,549      cycles                           #    2.982 GHz                    
-    28,276,545,925      instructions                     #    2.20  insn per cycle         
-       4.377652629 seconds time elapsed
+    12,563,689,959      cycles                           #    2.909 GHz                    
+    27,497,425,735      instructions                     #    2.19  insn per cycle         
+       4.373540406 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -110,15 +114,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.400950e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.401188e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.401188e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.140589e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.140841e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.140841e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     6.287943 sec
+TOTAL       :     6.489238 sec
 INFO: No Floating Point Exceptions have been reported
-    18,917,133,316      cycles                           #    3.007 GHz                    
-    53,900,822,413      instructions                     #    2.85  insn per cycle         
-       6.291810989 seconds time elapsed
+    18,905,197,683      cycles                           #    2.912 GHz                    
+    53,904,164,076      instructions                     #    2.85  insn per cycle         
+       6.493192099 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:32425) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.588454e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.588541e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.588541e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.554530e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.554616e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.554616e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     3.326167 sec
+TOTAL       :     3.398344 sec
 INFO: No Floating Point Exceptions have been reported
-     9,981,726,497      cycles                           #    2.998 GHz                    
-    27,151,411,979      instructions                     #    2.72  insn per cycle         
-       3.330120405 seconds time elapsed
+     9,911,477,696      cycles                           #    2.915 GHz                    
+    27,152,670,236      instructions                     #    2.74  insn per cycle         
+       3.402306403 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:96499) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -170,15 +174,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.463521e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.463922e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.463922e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.355915e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.356318e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.356318e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.526941 sec
+TOTAL       :     1.575190 sec
 INFO: No Floating Point Exceptions have been reported
-     4,301,902,923      cycles                           #    2.811 GHz                    
-     9,590,835,987      instructions                     #    2.23  insn per cycle         
-       1.530966019 seconds time elapsed
+     4,291,905,087      cycles                           #    2.719 GHz                    
+     9,591,555,035      instructions                     #    2.23  insn per cycle         
+       1.579167801 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:84971) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -200,15 +204,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.003469e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.004081e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.004081e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.851140e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.851660e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.851660e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.322167 sec
+TOTAL       :     1.373233 sec
 INFO: No Floating Point Exceptions have been reported
-     3,729,352,964      cycles                           #    2.814 GHz                    
-     8,515,368,436      instructions                     #    2.28  insn per cycle         
-       1.326036505 seconds time elapsed
+     3,746,937,682      cycles                           #    2.722 GHz                    
+     8,515,066,749      instructions                     #    2.27  insn per cycle         
+       1.377286559 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:80619) (512y:   89) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -230,15 +234,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.565416e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.566063e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.566063e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.400870e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.401385e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.401385e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.483865 sec
+TOTAL       :     1.555325 sec
 INFO: No Floating Point Exceptions have been reported
-     2,695,897,083      cycles                           #    1.813 GHz                    
-     4,281,463,157      instructions                     #    1.59  insn per cycle         
-       1.487939257 seconds time elapsed
+     2,700,248,684      cycles                           #    1.732 GHz                    
+     4,281,718,976      instructions                     #    1.59  insn per cycle         
+       1.559584258 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2852) (512y:  103) (512z:79119)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt
index 618d256396..d1172cfd54 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 12m 16s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 
-DATE: 2024-08-08_20:00:52
+DATE: 2024-08-29_22:58:43
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.058227e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.058613e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.058749e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.066484e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.066965e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.067125e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     2.446864 sec
+TOTAL       :     2.430851 sec
 INFO: No Floating Point Exceptions have been reported
-     8,303,278,275      cycles                           #    3.000 GHz                    
-    18,645,596,525      instructions                     #    2.25  insn per cycle         
-       2.826809106 seconds time elapsed
+     7,996,520,980      cycles                           #    2.904 GHz                    
+    17,582,907,369      instructions                     #    2.20  insn per cycle         
+       2.809657895 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.233958e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.236030e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.236303e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.223670e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.225797e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.226061e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856249e-04 +- 8.329951e-05 )  GeV^-6
-TOTAL       :     4.007873 sec
+TOTAL       :     4.006618 sec
 INFO: No Floating Point Exceptions have been reported
-    12,910,025,920      cycles                           #    2.976 GHz                    
-    30,025,616,729      instructions                     #    2.33  insn per cycle         
-       4.392667162 seconds time elapsed
+    12,585,748,075      cycles                           #    2.903 GHz                    
+    29,987,504,897      instructions                     #    2.38  insn per cycle         
+       4.390914139 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.875983e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.876201e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.876201e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.591203e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.591448e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.591448e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     6.703762 sec
+TOTAL       :     6.959272 sec
 INFO: No Floating Point Exceptions have been reported
-    18,880,147,773      cycles                           #    2.815 GHz                    
-    53,931,698,860      instructions                     #    2.86  insn per cycle         
-       6.707560831 seconds time elapsed
+    18,846,992,549      cycles                           #    2.708 GHz                    
+    53,934,698,792      instructions                     #    2.86  insn per cycle         
+       6.963312781 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:32023) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.621951e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.622050e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.622050e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.545189e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.545273e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.545273e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     3.258110 sec
+TOTAL       :     3.418174 sec
 INFO: No Floating Point Exceptions have been reported
-     9,846,977,880      cycles                           #    3.019 GHz                    
-    27,128,812,737      instructions                     #    2.76  insn per cycle         
-       3.262446550 seconds time elapsed
+     9,927,542,575      cycles                           #    2.902 GHz                    
+    27,129,401,925      instructions                     #    2.73  insn per cycle         
+       3.422241980 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:96375) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.448151e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.448577e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.448577e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.373922e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.374372e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.374372e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.533013 sec
+TOTAL       :     1.566638 sec
 INFO: No Floating Point Exceptions have been reported
-     4,309,903,765      cycles                           #    2.805 GHz                    
-     9,584,249,957      instructions                     #    2.22  insn per cycle         
-       1.537048676 seconds time elapsed
+     4,248,483,872      cycles                           #    2.706 GHz                    
+     9,585,288,003      instructions                     #    2.26  insn per cycle         
+       1.570770072 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:84978) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.985777e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.986306e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.986306e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.856631e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.857145e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.857145e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.327029 sec
+TOTAL       :     1.370752 sec
 INFO: No Floating Point Exceptions have been reported
-     3,743,360,462      cycles                           #    2.814 GHz                    
-     8,506,735,194      instructions                     #    2.27  insn per cycle         
-       1.330926412 seconds time elapsed
+     3,739,427,730      cycles                           #    2.721 GHz                    
+     8,506,975,276      instructions                     #    2.27  insn per cycle         
+       1.374833370 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:80642) (512y:  239) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.581234e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.581805e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.581805e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.397332e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.397841e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.397841e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.477295 sec
+TOTAL       :     1.557042 sec
 INFO: No Floating Point Exceptions have been reported
-     2,699,035,749      cycles                           #    1.824 GHz                    
-     4,280,090,319      instructions                     #    1.59  insn per cycle         
-       1.480967463 seconds time elapsed
+     2,701,434,125      cycles                           #    1.731 GHz                    
+     4,281,085,594      instructions                     #    1.58  insn per cycle         
+       1.561215984 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2689) (512y:  185) (512z:79103)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inlL_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inlL_hrd0.txt
index b48700b97f..9c6dc2a875 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inlL_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inlL_hrd0.txt
@@ -1,6 +1,6 @@
 
 ------------------------------------------------
-Preliminary build completed in 0d 00h 07m 24s
+Preliminary build completed in 0d 00h 07m 12s
 ------------------------------------------------
 
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
@@ -44,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 
-DATE: 2024-08-28_22:24:33
+DATE: 2024-08-30_00:45:09
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.338149e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.338604e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.338867e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.337724e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.338199e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.338376e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     2.242693 sec
+TOTAL       :     2.243520 sec
 INFO: No Floating Point Exceptions have been reported
-     7,348,976,543      cycles                           #    2.902 GHz                    
-    16,466,315,526      instructions                     #    2.24  insn per cycle         
-       2.591057214 seconds time elapsed
+     7,333,011,251      cycles                           #    2.895 GHz                    
+    16,571,702,127      instructions                     #    2.26  insn per cycle         
+       2.591709636 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inlL_hrd0/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.503743e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.505652e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.505876e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.489870e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.491766e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.491994e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856249e-04 +- 8.329951e-05 )  GeV^-6
-TOTAL       :     4.295817 sec
+TOTAL       :     4.301800 sec
 INFO: No Floating Point Exceptions have been reported
-    13,394,338,134      cycles                           #    2.918 GHz                    
-    29,875,097,657      instructions                     #    2.23  insn per cycle         
-       4.645638907 seconds time elapsed
+    13,363,583,535      cycles                           #    2.902 GHz                    
+    29,144,223,391      instructions                     #    2.18  insn per cycle         
+       4.658949907 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inlL_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -101,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.428130e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.428322e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.428322e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.906043e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.906237e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.906237e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     7.109317 sec
+TOTAL       :     7.651431 sec
 INFO: No Floating Point Exceptions have been reported
-    20,650,921,982      cycles                           #    2.904 GHz                    
-    56,867,588,911      instructions                     #    2.75  insn per cycle         
-       7.113345899 seconds time elapsed
+    20,751,128,951      cycles                           #    2.711 GHz                    
+    56,872,829,581      instructions                     #    2.74  insn per cycle         
+       7.655490146 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:40993) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inlL_hrd0/runTest_cpp.exe
@@ -130,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.439600e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.439678e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.439678e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.446382e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.446458e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.446458e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     3.669607 sec
+TOTAL       :     3.652754 sec
 INFO: No Floating Point Exceptions have been reported
-    10,661,868,211      cycles                           #    2.903 GHz                    
-    28,187,072,418      instructions                     #    2.64  insn per cycle         
-       3.673685531 seconds time elapsed
+    10,654,723,744      cycles                           #    2.914 GHz                    
+    28,187,101,942      instructions                     #    2.65  insn per cycle         
+       3.656838406 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:100554) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inlL_hrd0/runTest_cpp.exe
@@ -159,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.055903e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.056245e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.056245e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.108239e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.108589e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.108589e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.729784 sec
+TOTAL       :     1.701013 sec
 INFO: No Floating Point Exceptions have been reported
-     4,640,645,280      cycles                           #    2.678 GHz                    
-     9,974,510,085      instructions                     #    2.15  insn per cycle         
-       1.733938549 seconds time elapsed
+     4,620,549,968      cycles                           #    2.711 GHz                    
+     9,973,294,591      instructions                     #    2.16  insn per cycle         
+       1.705009969 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:94205) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inlL_hrd0/runTest_cpp.exe
@@ -188,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.459662e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.460086e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.460086e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.478898e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.479341e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.479341e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.528240 sec
+TOTAL       :     1.518979 sec
 INFO: No Floating Point Exceptions have been reported
-     4,140,408,789      cycles                           #    2.703 GHz                    
-     9,072,597,595      instructions                     #    2.19  insn per cycle         
-       1.532357792 seconds time elapsed
+     4,109,801,969      cycles                           #    2.699 GHz                    
+     9,072,472,376      instructions                     #    2.21  insn per cycle         
+       1.523113813 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:94048) (512y:   91) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inlL_hrd0/runTest_cpp.exe
@@ -217,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.220979e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.221430e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.221430e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.226077e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.226527e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.226527e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.642045 sec
+TOTAL       :     1.639007 sec
 INFO: No Floating Point Exceptions have been reported
-     2,840,456,827      cycles                           #    1.726 GHz                    
-     4,549,279,782      instructions                     #    1.60  insn per cycle         
-       1.646333171 seconds time elapsed
+     2,831,072,496      cycles                           #    1.724 GHz                    
+     4,549,300,952      instructions                     #    1.61  insn per cycle         
+       1.643153604 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5078) (512y:  105) (512z:89892)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inlL_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt
index b4fc180cc1..3869640735 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 13m 34s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 
-DATE: 2024-08-08_20:02:00
+DATE: 2024-08-29_22:59:52
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.298150e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.298890e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.299224e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.292937e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.293697e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.294129e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.186984e-05 +- 9.824899e-06 )  GeV^-6
-TOTAL       :     1.751662 sec
+TOTAL       :     1.743289 sec
 INFO: No Floating Point Exceptions have been reported
-     5,936,795,436      cycles                           #    2.952 GHz                    
-    12,013,270,651      instructions                     #    2.02  insn per cycle         
-       2.067502844 seconds time elapsed
+     5,790,347,981      cycles                           #    2.896 GHz                    
+    11,721,375,787      instructions                     #    2.02  insn per cycle         
+       2.057769051 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.155180e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.155800e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.155887e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.132817e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.133477e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.133612e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856829e-04 +- 8.333437e-05 )  GeV^-6
-TOTAL       :     2.055202 sec
+TOTAL       :     2.042100 sec
 INFO: No Floating Point Exceptions have been reported
-     6,915,039,139      cycles                           #    2.986 GHz                    
-    14,633,712,669      instructions                     #    2.12  insn per cycle         
-       2.372054868 seconds time elapsed
+     6,657,158,515      cycles                           #    2.894 GHz                    
+    13,520,154,293      instructions                     #    2.03  insn per cycle         
+       2.357691181 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.752648e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.752917e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.752917e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.397410e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.397670e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.397670e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187013e-05 +- 9.825040e-06 )  GeV^-6
-TOTAL       :     6.035465 sec
+TOTAL       :     6.289899 sec
 INFO: No Floating Point Exceptions have been reported
-    18,171,458,820      cycles                           #    3.009 GHz                    
-    53,912,614,149      instructions                     #    2.97  insn per cycle         
-       6.039280806 seconds time elapsed
+    18,272,622,117      cycles                           #    2.905 GHz                    
+    53,912,319,440      instructions                     #    2.95  insn per cycle         
+       6.293853770 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:20142) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.468219e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.468626e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.468626e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.335991e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.336407e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.336407e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187013e-05 +- 9.825037e-06 )  GeV^-6
-TOTAL       :     1.524160 sec
+TOTAL       :     1.584050 sec
 INFO: No Floating Point Exceptions have been reported
-     4,594,690,732      cycles                           #    3.008 GHz                    
-    13,806,361,271      instructions                     #    3.00  insn per cycle         
-       1.528090955 seconds time elapsed
+     4,590,954,775      cycles                           #    2.892 GHz                    
+    13,807,119,292      instructions                     #    3.01  insn per cycle         
+       1.588052809 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:97022) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.022651e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.024377e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.024377e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.716316e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.717989e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.717989e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826767e-06 )  GeV^-6
-TOTAL       :     0.754295 sec
+TOTAL       :     0.788124 sec
 INFO: No Floating Point Exceptions have been reported
-     2,137,910,409      cycles                           #    2.822 GHz                    
-     4,835,783,841      instructions                     #    2.26  insn per cycle         
-       0.758250875 seconds time elapsed
+     2,144,103,788      cycles                           #    2.709 GHz                    
+     4,836,269,392      instructions                     #    2.26  insn per cycle         
+       0.792211489 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:85497) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.922130e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.924339e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.924339e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.724734e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.726965e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.726965e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826767e-06 )  GeV^-6
-TOTAL       :     0.668838 sec
+TOTAL       :     0.685445 sec
 INFO: No Floating Point Exceptions have been reported
-     1,877,666,899      cycles                           #    2.793 GHz                    
-     4,290,021,460      instructions                     #    2.28  insn per cycle         
-       0.672738963 seconds time elapsed
+     1,870,591,030      cycles                           #    2.716 GHz                    
+     4,290,211,642      instructions                     #    2.29  insn per cycle         
+       0.689481731 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:81190) (512y:   44) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.249467e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.251538e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.251538e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.811640e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.813711e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.813711e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826771e-06 )  GeV^-6
-TOTAL       :     0.730439 sec
+TOTAL       :     0.777937 sec
 INFO: No Floating Point Exceptions have been reported
-     1,353,764,576      cycles                           #    1.845 GHz                    
-     2,161,505,151      instructions                     #    1.60  insn per cycle         
-       0.734391470 seconds time elapsed
+     1,355,244,615      cycles                           #    1.734 GHz                    
+     2,161,868,001      instructions                     #    1.60  insn per cycle         
+       0.782071569 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3469) (512y:   47) (512z:79334)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt
index 2973bcd9f9..5830ab0747 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 04s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 
-DATE: 2024-08-08_20:21:41
+DATE: 2024-08-29_23:32:19
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +57,15 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.303570e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.305124e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.305124e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.301417e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.303072e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.303072e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187093e-05 +- 9.825663e-06 )  GeV^-6
-TOTAL       :     1.683838 sec
+TOTAL       :     1.680148 sec
 INFO: No Floating Point Exceptions have been reported
-     5,740,674,837      cycles                           #    2.959 GHz                    
-    12,183,340,475      instructions                     #    2.12  insn per cycle         
-       1.996602458 seconds time elapsed
+     5,629,091,684      cycles                           #    2.907 GHz                    
+    12,087,888,534      instructions                     #    2.15  insn per cycle         
+       1.992324000 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
@@ -79,15 +83,15 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.128072e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.139024e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.139024e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.092652e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.104101e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.104101e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856440e-04 +- 8.331091e-05 )  GeV^-6
-TOTAL       :     2.036931 sec
+TOTAL       :     2.059612 sec
 INFO: No Floating Point Exceptions have been reported
-     6,817,978,012      cycles                           #    2.973 GHz                    
-    15,086,512,597      instructions                     #    2.21  insn per cycle         
-       2.349967443 seconds time elapsed
+     6,706,217,109      cycles                           #    2.898 GHz                    
+    14,832,815,446      instructions                     #    2.21  insn per cycle         
+       2.372868228 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -110,15 +114,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.676163e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.676428e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.676428e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.505841e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.506106e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.506106e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187013e-05 +- 9.825040e-06 )  GeV^-6
-TOTAL       :     6.087276 sec
+TOTAL       :     6.209814 sec
 INFO: No Floating Point Exceptions have been reported
-    18,179,826,190      cycles                           #    2.985 GHz                    
-    53,910,247,266      instructions                     #    2.97  insn per cycle         
-       6.091212728 seconds time elapsed
+    18,140,361,227      cycles                           #    2.920 GHz                    
+    53,910,858,548      instructions                     #    2.97  insn per cycle         
+       6.213739946 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:20142) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.464690e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.465102e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.465102e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.372408e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.372872e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.372872e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187013e-05 +- 9.825037e-06 )  GeV^-6
-TOTAL       :     1.525630 sec
+TOTAL       :     1.567410 sec
 INFO: No Floating Point Exceptions have been reported
-     4,590,585,740      cycles                           #    3.003 GHz                    
-    13,807,319,566      instructions                     #    3.01  insn per cycle         
-       1.529386769 seconds time elapsed
+     4,594,145,112      cycles                           #    2.927 GHz                    
+    13,808,050,244      instructions                     #    3.01  insn per cycle         
+       1.571423635 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:97022) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -170,15 +174,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.967974e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.969738e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.969738e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.731625e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.733291e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.733291e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826767e-06 )  GeV^-6
-TOTAL       :     0.760104 sec
+TOTAL       :     0.786425 sec
 INFO: No Floating Point Exceptions have been reported
-     2,138,286,262      cycles                           #    2.802 GHz                    
-     4,837,282,487      instructions                     #    2.26  insn per cycle         
-       0.763970265 seconds time elapsed
+     2,145,796,069      cycles                           #    2.717 GHz                    
+     4,836,929,539      instructions                     #    2.25  insn per cycle         
+       0.790483054 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:85497) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -200,15 +204,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.967332e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.969544e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.969544e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.692135e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.694759e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.694759e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826767e-06 )  GeV^-6
-TOTAL       :     0.664857 sec
+TOTAL       :     0.688369 sec
 INFO: No Floating Point Exceptions have been reported
-     1,870,319,411      cycles                           #    2.799 GHz                    
-     4,291,006,476      instructions                     #    2.29  insn per cycle         
-       0.668734591 seconds time elapsed
+     1,874,357,075      cycles                           #    2.709 GHz                    
+     4,291,087,124      instructions                     #    2.29  insn per cycle         
+       0.692357424 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:81190) (512y:   44) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -230,15 +234,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.241242e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.243401e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.243401e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.794290e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.796316e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.796316e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826771e-06 )  GeV^-6
-TOTAL       :     0.731334 sec
+TOTAL       :     0.779484 sec
 INFO: No Floating Point Exceptions have been reported
-     1,357,966,074      cycles                           #    1.849 GHz                    
-     2,162,865,434      instructions                     #    1.59  insn per cycle         
-       0.735255583 seconds time elapsed
+     1,355,608,664      cycles                           #    1.732 GHz                    
+     2,162,825,766      instructions                     #    1.60  insn per cycle         
+       0.783659981 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3469) (512y:   47) (512z:79334)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt
index cfac3f719e..265acd58cc 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 11m 07s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 
-DATE: 2024-08-08_20:02:49
+DATE: 2024-08-29_23:00:42
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.289590e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.290901e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.291153e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.278308e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.279020e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.279441e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.186984e-05 +- 9.824899e-06 )  GeV^-6
-TOTAL       :     1.752222 sec
+TOTAL       :     1.741721 sec
 INFO: No Floating Point Exceptions have been reported
-     6,011,479,262      cycles                           #    2.988 GHz                    
-    11,822,786,435      instructions                     #    1.97  insn per cycle         
-       2.068235514 seconds time elapsed
+     5,810,428,962      cycles                           #    2.906 GHz                    
+    11,824,497,448      instructions                     #    2.04  insn per cycle         
+       2.055558992 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.118039e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.118627e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.118705e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.122942e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.123570e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.123692e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856829e-04 +- 8.333437e-05 )  GeV^-6
-TOTAL       :     2.087174 sec
+TOTAL       :     2.062523 sec
 INFO: No Floating Point Exceptions have been reported
-     7,020,765,748      cycles                           #    2.977 GHz                    
-    15,445,166,662      instructions                     #    2.20  insn per cycle         
-       2.414506634 seconds time elapsed
+     6,728,286,198      cycles                           #    2.901 GHz                    
+    14,206,939,018      instructions                     #    2.11  insn per cycle         
+       2.377928524 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.753426e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.753693e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.753693e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.479074e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.479333e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.479333e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187013e-05 +- 9.825040e-06 )  GeV^-6
-TOTAL       :     6.033711 sec
+TOTAL       :     6.228099 sec
 INFO: No Floating Point Exceptions have been reported
-    18,095,249,979      cycles                           #    2.998 GHz                    
-    53,894,797,748      instructions                     #    2.98  insn per cycle         
-       6.037598164 seconds time elapsed
+    18,108,781,113      cycles                           #    2.907 GHz                    
+    53,894,957,040      instructions                     #    2.98  insn per cycle         
+       6.232064949 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:20142) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.476703e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.477111e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.477111e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.364519e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.364927e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.364927e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187013e-05 +- 9.825037e-06 )  GeV^-6
-TOTAL       :     1.520725 sec
+TOTAL       :     1.571241 sec
 INFO: No Floating Point Exceptions have been reported
-     4,582,334,771      cycles                           #    3.007 GHz                    
-    13,799,523,503      instructions                     #    3.01  insn per cycle         
-       1.524516230 seconds time elapsed
+     4,583,763,877      cycles                           #    2.911 GHz                    
+    13,799,671,765      instructions                     #    3.01  insn per cycle         
+       1.575253406 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:96657) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.920572e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.922271e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.922271e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.683095e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.684725e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.684725e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826767e-06 )  GeV^-6
-TOTAL       :     0.764164 sec
+TOTAL       :     0.791842 sec
 INFO: No Floating Point Exceptions have been reported
-     2,153,123,984      cycles                           #    2.806 GHz                    
-     4,840,163,805      instructions                     #    2.25  insn per cycle         
-       0.767980176 seconds time elapsed
+     2,153,132,055      cycles                           #    2.708 GHz                    
+     4,839,924,419      instructions                     #    2.25  insn per cycle         
+       0.795824411 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:85887) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.954158e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.956209e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.956209e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.585098e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.587181e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.587181e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826767e-06 )  GeV^-6
-TOTAL       :     0.665841 sec
+TOTAL       :     0.698079 sec
 INFO: No Floating Point Exceptions have been reported
-     1,891,343,146      cycles                           #    2.826 GHz                    
-     4,293,658,543      instructions                     #    2.27  insn per cycle         
-       0.669786991 seconds time elapsed
+     1,896,280,008      cycles                           #    2.704 GHz                    
+     4,294,088,324      instructions                     #    2.26  insn per cycle         
+       0.702136523 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:81730) (512y:   24) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.171151e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.173263e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.173263e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.721862e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.723869e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.723869e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826771e-06 )  GeV^-6
-TOTAL       :     0.740474 sec
+TOTAL       :     0.787800 sec
 INFO: No Floating Point Exceptions have been reported
-     1,358,622,018      cycles                           #    1.827 GHz                    
-     2,168,397,288      instructions                     #    1.60  insn per cycle         
-       0.744609857 seconds time elapsed
+     1,358,400,068      cycles                           #    1.717 GHz                    
+     2,168,635,540      instructions                     #    1.60  insn per cycle         
+       0.791988289 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4082) (512y:   32) (512z:79555)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inlL_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inlL_hrd0.txt
new file mode 100644
index 0000000000..3ee5482870
--- /dev/null
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inlL_hrd0.txt
@@ -0,0 +1,244 @@
+
+------------------------------------------------
+Preliminary build completed in 0d 00h 05m 39s
+------------------------------------------------
+
+Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
+BACKEND=cpp512y (was cppauto)
+OMPFLAGS=
+FPTYPE='d'
+HELINL='0'
+HRDCOD='0'
+HASCURAND=hasCurand
+HASHIPRAND=hasNoHiprand
+Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
+make: Nothing to be done for 'gtestlibs'.
+
+make USEBUILDDIR=1 BACKEND=cuda
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+make USEBUILDDIR=1 BACKEND=cppnone
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+make USEBUILDDIR=1 BACKEND=cppsse4
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+make USEBUILDDIR=1 BACKEND=cpp512y
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+make USEBUILDDIR=1 BACKEND=cpp512z
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+DATE: 2024-08-30_00:46:21
+
+On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inlL_hrd0/check_cuda.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 8.162769e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.164045e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.164572e+02                 )  sec^-1
+MeanMatrixElemValue         = ( 1.186984e-05 +- 9.824899e-06 )  GeV^-6
+TOTAL       :     1.401346 sec
+INFO: No Floating Point Exceptions have been reported
+     4,781,253,878      cycles                           #    2.898 GHz                    
+     9,985,478,194      instructions                     #    2.09  insn per cycle         
+       1.706656254 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inlL_hrd0/check_cuda.exe -p 1 256 1
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
+==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
+.........................................................................
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inlL_hrd0/check_cuda.exe -p 64 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 1.808778e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.809257e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.809345e+04                 )  sec^-1
+MeanMatrixElemValue         = ( 1.856829e-04 +- 8.333437e-05 )  GeV^-6
+TOTAL       :     2.263958 sec
+INFO: No Floating Point Exceptions have been reported
+     7,297,556,279      cycles                           #    2.903 GHz                    
+    16,048,771,042      instructions                     #    2.20  insn per cycle         
+       2.570062187 seconds time elapsed
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inlL_hrd0/runTest_cuda.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inlL_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inlL_hrd0/fcheck_cuda.exe 2 64 2
+Avg ME (C++/GPU)   = 9.849634e-03
+Avg ME (F77/GPU)   = 9.8712451931303249E-003
+Relative difference = 0.002194111286807761
+OK (relative difference <= 5E-3)
+=========================================================================
+Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inlL_hrd0/check_hip.exe
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inlL_hrd0/check_cpp.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
+EvtsPerSec[Rmb+ME]     (23) = ( 7.975071e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.975298e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.975298e+01                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187013e-05 +- 9.825039e-06 )  GeV^-6
+TOTAL       :     6.622748 sec
+INFO: No Floating Point Exceptions have been reported
+    19,311,972,686      cycles                           #    2.915 GHz                    
+    55,952,164,962      instructions                     #    2.90  insn per cycle         
+       6.626608430 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:20764) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 9.847956e-03
+Avg ME (F77/C++)    = 9.8479555399647936E-003
+Relative difference = 4.6713775539776896e-08
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inlL_hrd0/check_cpp.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 3.151349e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.151770e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.151770e+02                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187012e-05 +- 9.825034e-06 )  GeV^-6
+TOTAL       :     1.677785 sec
+INFO: No Floating Point Exceptions have been reported
+     4,891,790,160      cycles                           #    2.910 GHz                    
+    14,322,008,076      instructions                     #    2.93  insn per cycle         
+       1.681751677 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:101128) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 9.847952e-03
+Avg ME (F77/C++)    = 9.8479518148152628E-003
+Relative difference = 1.8804390733861014e-08
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inlL_hrd0/check_cpp.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 6.169107e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.170623e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.170623e+02                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187185e-05 +- 9.826743e-06 )  GeV^-6
+TOTAL       :     0.858011 sec
+INFO: No Floating Point Exceptions have been reported
+     2,327,251,629      cycles                           #    2.701 GHz                    
+     5,055,690,064      instructions                     #    2.17  insn per cycle         
+       0.862299437 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:94671) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 9.892979e-03
+Avg ME (F77/C++)    = 9.8929794350922796E-003
+Relative difference = 4.3979905336188065e-08
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inlL_hrd0/check_cpp.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 6.873385e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.875145e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.875145e+02                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187185e-05 +- 9.826743e-06 )  GeV^-6
+TOTAL       :     0.770224 sec
+INFO: No Floating Point Exceptions have been reported
+     2,095,606,493      cycles                           #    2.709 GHz                    
+     4,578,630,372      instructions                     #    2.18  insn per cycle         
+       0.774468310 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:94567) (512y:   46) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 9.892979e-03
+Avg ME (F77/C++)    = 9.8929794350922796E-003
+Relative difference = 4.3979905336188065e-08
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inlL_hrd0/check_cpp.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 6.406506e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.408455e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.408455e+02                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187185e-05 +- 9.826742e-06 )  GeV^-6
+TOTAL       :     0.826705 sec
+INFO: No Floating Point Exceptions have been reported
+     1,434,474,016      cycles                           #    1.728 GHz                    
+     2,300,827,293      instructions                     #    1.60  insn per cycle         
+       0.830912379 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5686) (512y:   49) (512z:90066)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 9.892989e-03
+Avg ME (F77/C++)    = 9.8929887485386878E-003
+Relative difference = 2.541813316911761e-08
+OK (relative difference <= 5E-3)
+=========================================================================
+
+TEST COMPLETED
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt
index 30f43d1d54..51124f036b 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 14m 56s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 
-DATE: 2024-08-08_20:03:38
+DATE: 2024-08-29_23:01:32
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.679462e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.679946e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.680144e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.688985e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.689689e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.689968e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     2.195383 sec
+TOTAL       :     2.177420 sec
 INFO: No Floating Point Exceptions have been reported
-     7,438,879,261      cycles                           #    2.953 GHz                    
-    16,326,818,821      instructions                     #    2.19  insn per cycle         
-       2.577345674 seconds time elapsed
+     7,251,729,774      cycles                           #    2.900 GHz                    
+    16,346,188,279      instructions                     #    2.25  insn per cycle         
+       2.556548527 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.108202e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.108498e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.108526e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.113584e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.113894e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.113942e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856249e-04 +- 8.329951e-05 )  GeV^-6
-TOTAL       :     3.425728 sec
+TOTAL       :     3.408716 sec
 INFO: No Floating Point Exceptions have been reported
-    11,268,079,350      cycles                           #    3.003 GHz                    
-    26,526,619,371      instructions                     #    2.35  insn per cycle         
-       3.809078207 seconds time elapsed
+    10,895,818,319      cycles                           #    2.917 GHz                    
+    25,195,509,626      instructions                     #    2.31  insn per cycle         
+       3.791268467 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.696399e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.696636e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.696636e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.498375e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.498600e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.498600e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     6.867954 sec
+TOTAL       :     7.041519 sec
 INFO: No Floating Point Exceptions have been reported
-    19,211,187,371      cycles                           #    2.796 GHz                    
-    54,136,498,902      instructions                     #    2.82  insn per cycle         
-       6.871886606 seconds time elapsed
+    19,169,796,431      cycles                           #    2.721 GHz                    
+    54,133,266,233      instructions                     #    2.82  insn per cycle         
+       7.045435972 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:32001) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.599481e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.599571e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.599571e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.531964e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.532049e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.532049e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     3.303538 sec
+TOTAL       :     3.448576 sec
 INFO: No Floating Point Exceptions have been reported
-     9,333,906,777      cycles                           #    2.823 GHz                    
-    26,186,384,503      instructions                     #    2.81  insn per cycle         
-       3.307369825 seconds time elapsed
+     9,370,462,356      cycles                           #    2.715 GHz                    
+    26,186,953,076      instructions                     #    2.79  insn per cycle         
+       3.452660252 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:96048) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.642781e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.643249e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.643249e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.485181e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.485611e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.485611e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     1.453378 sec
+TOTAL       :     1.516415 sec
 INFO: No Floating Point Exceptions have been reported
-     4,089,405,470      cycles                           #    2.807 GHz                    
-     9,248,953,263      instructions                     #    2.26  insn per cycle         
-       1.457404649 seconds time elapsed
+     4,087,096,426      cycles                           #    2.689 GHz                    
+     9,248,408,142      instructions                     #    2.26  insn per cycle         
+       1.520508640 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:84378) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.265363e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.265985e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.265985e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.041285e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.041878e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.041878e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     1.239836 sec
+TOTAL       :     1.309906 sec
 INFO: No Floating Point Exceptions have been reported
-     3,507,542,927      cycles                           #    2.822 GHz                    
-     8,182,646,854      instructions                     #    2.33  insn per cycle         
-       1.243760162 seconds time elapsed
+     3,507,609,505      cycles                           #    2.671 GHz                    
+     8,182,225,015      instructions                     #    2.33  insn per cycle         
+       1.313975216 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:80003) (512y:   79) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.616663e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.617178e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.617178e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.418742e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.419249e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.419249e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     1.461797 sec
+TOTAL       :     1.546152 sec
 INFO: No Floating Point Exceptions have been reported
-     2,666,404,255      cycles                           #    1.820 GHz                    
-     4,171,669,153      instructions                     #    1.56  insn per cycle         
-       1.465874998 seconds time elapsed
+     2,666,659,514      cycles                           #    1.721 GHz                    
+     4,172,128,592      instructions                     #    1.56  insn per cycle         
+       1.550318622 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2589) (512y:   93) (512z:78909)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt
index 7b7d65b2d2..ff1435b3e2 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 12m 07s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 
-DATE: 2024-08-08_20:04:45
+DATE: 2024-08-29_23:02:41
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.675385e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.675879e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.676008e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.674691e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.675223e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.675466e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     2.190431 sec
+TOTAL       :     2.180460 sec
 INFO: No Floating Point Exceptions have been reported
-     7,517,385,120      cycles                           #    2.989 GHz                    
-    15,570,357,961      instructions                     #    2.07  insn per cycle         
-       2.571136488 seconds time elapsed
+     7,285,576,606      cycles                           #    2.911 GHz                    
+    16,299,901,303      instructions                     #    2.24  insn per cycle         
+       2.558450596 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.109468e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.109746e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.109778e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.109231e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.109533e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.109571e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856249e-04 +- 8.329951e-05 )  GeV^-6
-TOTAL       :     3.419906 sec
+TOTAL       :     3.406546 sec
 INFO: No Floating Point Exceptions have been reported
-    11,221,781,722      cycles                           #    2.994 GHz                    
-    24,236,211,120      instructions                     #    2.16  insn per cycle         
-       3.803243859 seconds time elapsed
+    10,890,129,022      cycles                           #    2.909 GHz                    
+    23,762,395,814      instructions                     #    2.18  insn per cycle         
+       3.800131530 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.902849e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.903107e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.903107e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.038908e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.039135e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.039135e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     6.673081 sec
+TOTAL       :     6.569694 sec
 INFO: No Floating Point Exceptions have been reported
-    19,149,429,604      cycles                           #    2.868 GHz                    
-    54,156,492,076      instructions                     #    2.83  insn per cycle         
-       6.676939828 seconds time elapsed
+    19,149,215,101      cycles                           #    2.913 GHz                    
+    54,156,003,574      instructions                     #    2.83  insn per cycle         
+       6.574054312 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:32203) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.571432e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.571520e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.571520e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.519464e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.519556e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.519556e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     3.363251 sec
+TOTAL       :     3.477621 sec
 INFO: No Floating Point Exceptions have been reported
-     9,398,223,848      cycles                           #    2.792 GHz                    
-    26,086,325,143      instructions                     #    2.78  insn per cycle         
-       3.367354553 seconds time elapsed
+     9,441,874,473      cycles                           #    2.713 GHz                    
+    26,086,760,463      instructions                     #    2.76  insn per cycle         
+       3.481656660 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:95937) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.625397e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.625854e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.625854e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.547459e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.547914e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.547914e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     1.456994 sec
+TOTAL       :     1.490795 sec
 INFO: No Floating Point Exceptions have been reported
-     4,075,335,135      cycles                           #    2.792 GHz                    
-     9,212,511,442      instructions                     #    2.26  insn per cycle         
-       1.460794766 seconds time elapsed
+     4,054,842,242      cycles                           #    2.714 GHz                    
+     9,212,719,836      instructions                     #    2.27  insn per cycle         
+       1.494856862 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:83852) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.243367e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.244047e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.244047e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.103065e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.103686e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.103686e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     1.245554 sec
+TOTAL       :     1.288731 sec
 INFO: No Floating Point Exceptions have been reported
-     3,512,150,002      cycles                           #    2.812 GHz                    
-     8,166,955,109      instructions                     #    2.33  insn per cycle         
-       1.249525029 seconds time elapsed
+     3,507,522,727      cycles                           #    2.715 GHz                    
+     8,167,227,176      instructions                     #    2.33  insn per cycle         
+       1.292784178 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:79409) (512y:  229) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.660094e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.660683e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.660683e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.475640e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.476182e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.476182e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     1.444444 sec
+TOTAL       :     1.520957 sec
 INFO: No Floating Point Exceptions have been reported
-     2,623,623,826      cycles                           #    1.812 GHz                    
-     4,166,476,704      instructions                     #    1.59  insn per cycle         
-       1.448438406 seconds time elapsed
+     2,623,180,222      cycles                           #    1.721 GHz                    
+     4,166,715,132      instructions                     #    1.59  insn per cycle         
+       1.525139188 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1853) (512y:  175) (512z:78883)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inlL_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inlL_hrd0.txt
new file mode 100644
index 0000000000..4b558fc493
--- /dev/null
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inlL_hrd0.txt
@@ -0,0 +1,244 @@
+
+------------------------------------------------
+Preliminary build completed in 0d 00h 05m 55s
+------------------------------------------------
+
+Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
+BACKEND=cpp512y (was cppauto)
+OMPFLAGS=
+FPTYPE='d'
+HELINL='0'
+HRDCOD='0'
+HASCURAND=hasCurand
+HASHIPRAND=hasNoHiprand
+Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
+make: Nothing to be done for 'gtestlibs'.
+
+make USEBUILDDIR=1 BACKEND=cuda
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+make USEBUILDDIR=1 BACKEND=cppnone
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+make USEBUILDDIR=1 BACKEND=cppsse4
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+make USEBUILDDIR=1 BACKEND=cpp512y
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+make USEBUILDDIR=1 BACKEND=cpp512z
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+DATE: 2024-08-30_00:47:12
+
+On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inlL_hrd0/check_cuda.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 5.330889e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.331572e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.331873e+02                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
+TOTAL       :     1.922622 sec
+INFO: No Floating Point Exceptions have been reported
+     6,416,305,892      cycles                           #    2.901 GHz                    
+    14,096,228,194      instructions                     #    2.20  insn per cycle         
+       2.268828841 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inlL_hrd0/check_cuda.exe -p 1 256 1
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
+==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
+.........................................................................
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inlL_hrd0/check_cuda.exe -p 64 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 1.118335e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.118668e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.118708e+04                 )  sec^-1
+MeanMatrixElemValue         = ( 1.856249e-04 +- 8.329951e-05 )  GeV^-6
+TOTAL       :     3.369964 sec
+INFO: No Floating Point Exceptions have been reported
+    10,644,765,911      cycles                           #    2.908 GHz                    
+    25,193,620,358      instructions                     #    2.37  insn per cycle         
+       3.717710359 seconds time elapsed
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inlL_hrd0/runTest_cuda.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inlL_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inlL_hrd0/fcheck_cuda.exe 2 64 2
+Avg ME (C++/GPU)   = 9.872263e-03
+Avg ME (F77/GPU)   = 9.8722599015656533E-003
+Relative difference = 3.138524921691728e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inlL_hrd0/check_hip.exe
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inlL_hrd0/check_cpp.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
+EvtsPerSec[Rmb+ME]     (23) = ( 6.821826e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.822001e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.822001e+01                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
+TOTAL       :     7.740602 sec
+INFO: No Floating Point Exceptions have been reported
+    21,003,139,356      cycles                           #    2.712 GHz                    
+    57,101,681,472      instructions                     #    2.72  insn per cycle         
+       7.744639331 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:41064) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 9.872263e-03
+Avg ME (F77/C++)    = 9.8722595861831675E-003
+Relative difference = 3.457988134687711e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inlL_hrd0/check_cpp.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 1.388295e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.388365e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.388365e+02                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
+TOTAL       :     3.804599 sec
+INFO: No Floating Point Exceptions have been reported
+    10,303,679,807      cycles                           #    2.706 GHz                    
+    27,329,219,876      instructions                     #    2.65  insn per cycle         
+       3.808665387 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:100597) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 9.872263e-03
+Avg ME (F77/C++)    = 9.8722594844308162E-003
+Relative difference = 3.5610570575237004e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inlL_hrd0/check_cpp.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 3.231770e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.232143e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.232143e+02                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
+TOTAL       :     1.635935 sec
+INFO: No Floating Point Exceptions have been reported
+     4,448,171,118      cycles                           #    2.713 GHz                    
+     9,682,699,177      instructions                     #    2.18  insn per cycle         
+       1.640117935 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:94080) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 9.872263e-03
+Avg ME (F77/C++)    = 9.8722594324461913E-003
+Relative difference = 3.613714310412983e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inlL_hrd0/check_cpp.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 3.683420e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.684016e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.684016e+02                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
+TOTAL       :     1.435500 sec
+INFO: No Floating Point Exceptions have been reported
+     3,902,457,276      cycles                           #    2.712 GHz                    
+     8,788,108,913      instructions                     #    2.25  insn per cycle         
+       1.439589443 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:93918) (512y:   79) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 9.872263e-03
+Avg ME (F77/C++)    = 9.8722594324461913E-003
+Relative difference = 3.613714310412983e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inlL_hrd0/check_cpp.exe -p 1 256 2 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 3.275101e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.275566e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.275566e+02                 )  sec^-1
+MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
+TOTAL       :     1.614838 sec
+INFO: No Floating Point Exceptions have been reported
+     2,775,300,488      cycles                           #    1.715 GHz                    
+     4,448,082,495      instructions                     #    1.60  insn per cycle         
+       1.618972309 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5089) (512y:   93) (512z:89854)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 9.872263e-03
+Avg ME (F77/C++)    = 9.8722594324461913E-003
+Relative difference = 3.613714310412983e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+
+TEST COMPLETED
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt
index dc70f1aa96..8f58b467db 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 01m 07s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-08-08_19:58:27
+DATE: 2024-08-29_22:56:15
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.793830e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.275665e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.618309e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.726362e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.262701e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.610286e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.446682 sec
+TOTAL       :     0.451987 sec
 INFO: No Floating Point Exceptions have been reported
-     1,973,218,669      cycles                           #    2.938 GHz                    
-     2,737,206,349      instructions                     #    1.39  insn per cycle         
-       0.728215190 seconds time elapsed
+     1,896,871,752      cycles                           #    2.848 GHz                    
+     2,681,389,264      instructions                     #    1.41  insn per cycle         
+       0.724279481 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.512201e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.215148e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.564113e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.348461e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.144104e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.549018e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.602505e+02 +- 2.116328e+02 )  GeV^-2
-TOTAL       :     0.528377 sec
+TOTAL       :     0.533005 sec
 INFO: No Floating Point Exceptions have been reported
-     2,273,295,859      cycles                           #    2.942 GHz                    
-     3,270,605,178      instructions                     #    1.44  insn per cycle         
-       0.829840488 seconds time elapsed
+     2,189,534,054      cycles                           #    2.846 GHz                    
+     3,087,782,694      instructions                     #    1.41  insn per cycle         
+       0.826717021 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.087919e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.111512e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.111512e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.051761e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.074255e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.074255e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     1.525836 sec
+TOTAL       :     1.577567 sec
 INFO: No Floating Point Exceptions have been reported
-     4,620,985,524      cycles                           #    3.021 GHz                    
-    13,191,789,695      instructions                     #    2.85  insn per cycle         
-       1.530034055 seconds time elapsed
+     4,620,045,988      cycles                           #    2.922 GHz                    
+    13,190,317,664      instructions                     #    2.86  insn per cycle         
+       1.581771455 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  707) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.913767e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.985469e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.985469e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.844349e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.914518e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.914518e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.875694 sec
+TOTAL       :     0.907812 sec
 INFO: No Floating Point Exceptions have been reported
-     2,645,390,944      cycles                           #    3.009 GHz                    
-     7,556,169,585      instructions                     #    2.86  insn per cycle         
-       0.879849311 seconds time elapsed
+     2,648,120,227      cycles                           #    2.907 GHz                    
+     7,555,054,987      instructions                     #    2.85  insn per cycle         
+       0.912056964 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.250464e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.457998e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.457998e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.046694e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.243521e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.243521e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.522755 sec
+TOTAL       :     0.557677 sec
 INFO: No Floating Point Exceptions have been reported
-     1,489,187,494      cycles                           #    2.830 GHz                    
-     3,159,085,018      instructions                     #    2.12  insn per cycle         
-       0.526770948 seconds time elapsed
+     1,493,865,953      cycles                           #    2.662 GHz                    
+     3,159,494,458      instructions                     #    2.11  insn per cycle         
+       0.561947750 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2984) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.609694e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.866945e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.866945e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.460167e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.708483e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.708483e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.473366 sec
+TOTAL       :     0.493228 sec
 INFO: No Floating Point Exceptions have been reported
-     1,347,276,225      cycles                           #    2.825 GHz                    
-     3,016,026,977      instructions                     #    2.24  insn per cycle         
-       0.477451794 seconds time elapsed
+     1,352,040,215      cycles                           #    2.721 GHz                    
+     3,014,341,429      instructions                     #    2.23  insn per cycle         
+       0.497512207 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2745) (512y:  104) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.459896e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.579821e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.579821e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.284889e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.393571e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.393571e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.687520 sec
+TOTAL       :     0.739242 sec
 INFO: No Floating Point Exceptions have been reported
-     1,326,541,553      cycles                           #    1.920 GHz                    
-     1,964,358,241      instructions                     #    1.48  insn per cycle         
-       0.691777094 seconds time elapsed
+     1,329,435,566      cycles                           #    1.790 GHz                    
+     1,962,380,777      instructions                     #    1.48  insn per cycle         
+       0.743503104 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1367) (512y:  106) (512z: 2217)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt
index 280fcce352..26b4e39e87 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 16s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-08-08_20:19:09
+DATE: 2024-08-29_23:29:44
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +57,15 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.684298e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.299204e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.299204e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.479417e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.184083e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.184083e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.471497 sec
+TOTAL       :     0.479274 sec
 INFO: No Floating Point Exceptions have been reported
-     2,016,663,667      cycles                           #    2.932 GHz                    
-     2,996,818,007      instructions                     #    1.49  insn per cycle         
-       0.744526851 seconds time elapsed
+     1,978,004,369      cycles                           #    2.865 GHz                    
+     2,955,147,787      instructions                     #    1.49  insn per cycle         
+       0.748924266 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
@@ -79,15 +83,15 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.407307e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.579683e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.579683e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.246218e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.372237e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.372237e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.602505e+02 +- 2.116328e+02 )  GeV^-2
-TOTAL       :     0.738495 sec
+TOTAL       :     0.755791 sec
 INFO: No Floating Point Exceptions have been reported
-     2,913,311,119      cycles                           #    2.959 GHz                    
-     4,473,148,579      instructions                     #    1.54  insn per cycle         
-       1.042109459 seconds time elapsed
+     2,914,728,097      cycles                           #    2.881 GHz                    
+     4,500,187,574      instructions                     #    1.54  insn per cycle         
+       1.069998281 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -110,15 +114,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.071825e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.094847e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.094847e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.050932e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.074087e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.074087e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     1.553859 sec
+TOTAL       :     1.586469 sec
 INFO: No Floating Point Exceptions have been reported
-     4,647,790,593      cycles                           #    2.984 GHz                    
-    13,197,257,990      instructions                     #    2.84  insn per cycle         
-       1.558215122 seconds time elapsed
+     4,664,946,821      cycles                           #    2.934 GHz                    
+    13,198,355,646      instructions                     #    2.83  insn per cycle         
+       1.590859708 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  707) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.902347e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.973784e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.973784e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.808190e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.878202e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.878202e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.886591 sec
+TOTAL       :     0.932993 sec
 INFO: No Floating Point Exceptions have been reported
-     2,676,044,915      cycles                           #    3.006 GHz                    
-     7,604,510,010      instructions                     #    2.84  insn per cycle         
-       0.890913281 seconds time elapsed
+     2,689,645,029      cycles                           #    2.872 GHz                    
+     7,602,533,127      instructions                     #    2.83  insn per cycle         
+       0.937357913 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -170,15 +174,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.212543e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.422665e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.422665e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.098794e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.305964e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.305964e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.536325 sec
+TOTAL       :     0.556740 sec
 INFO: No Floating Point Exceptions have been reported
-     1,528,484,723      cycles                           #    2.830 GHz                    
-     3,209,947,960      instructions                     #    2.10  insn per cycle         
-       0.540711031 seconds time elapsed
+     1,538,126,729      cycles                           #    2.743 GHz                    
+     3,210,272,712      instructions                     #    2.09  insn per cycle         
+       0.561227268 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2984) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -200,15 +204,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.560716e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.811838e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.811838e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.454466e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.709491e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.709491e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.486090 sec
+TOTAL       :     0.502503 sec
 INFO: No Floating Point Exceptions have been reported
-     1,376,959,578      cycles                           #    2.811 GHz                    
-     3,063,340,210      instructions                     #    2.22  insn per cycle         
-       0.490411106 seconds time elapsed
+     1,390,826,167      cycles                           #    2.746 GHz                    
+     3,065,263,449      instructions                     #    2.20  insn per cycle         
+       0.507090204 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2745) (512y:  104) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -230,15 +234,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.438051e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.554379e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.554379e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.304175e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.415855e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.415855e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.699323 sec
+TOTAL       :     0.741021 sec
 INFO: No Floating Point Exceptions have been reported
-     1,353,225,054      cycles                           #    1.926 GHz                    
-     1,999,803,163      instructions                     #    1.48  insn per cycle         
-       0.703554082 seconds time elapsed
+     1,370,544,184      cycles                           #    1.840 GHz                    
+     2,002,405,410      instructions                     #    1.46  insn per cycle         
+       0.745593381 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1367) (512y:  106) (512z: 2217)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt
index 0801a72f2e..2a1fefe99c 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 48s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-08-08_19:58:40
+DATE: 2024-08-29_22:56:28
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.715940e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.160616e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.486831e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.712805e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.193466e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.523668e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.449924 sec
+TOTAL       :     0.449950 sec
 INFO: No Floating Point Exceptions have been reported
-     1,942,000,933      cycles                           #    2.932 GHz                    
-     2,723,193,332      instructions                     #    1.40  insn per cycle         
-       0.721112435 seconds time elapsed
+     1,900,965,307      cycles                           #    2.859 GHz                    
+     2,713,634,896      instructions                     #    1.43  insn per cycle         
+       0.722081118 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.484674e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.054198e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.395966e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.327514e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.030393e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.419287e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.602505e+02 +- 2.116328e+02 )  GeV^-2
-TOTAL       :     0.530941 sec
+TOTAL       :     0.533691 sec
 INFO: No Floating Point Exceptions have been reported
-     2,253,028,696      cycles                           #    2.947 GHz                    
-     3,232,782,518      instructions                     #    1.43  insn per cycle         
-       0.823488099 seconds time elapsed
+     2,202,998,596      cycles                           #    2.860 GHz                    
+     3,163,743,502      instructions                     #    1.44  insn per cycle         
+       0.827476712 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.055734e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.078647e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.078647e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.053904e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.076658e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.076658e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     1.572076 sec
+TOTAL       :     1.574324 sec
 INFO: No Floating Point Exceptions have been reported
-     4,625,532,940      cycles                           #    2.937 GHz                    
-    13,181,547,125      instructions                     #    2.85  insn per cycle         
-       1.575799334 seconds time elapsed
+     4,618,630,520      cycles                           #    2.927 GHz                    
+    13,179,133,059      instructions                     #    2.85  insn per cycle         
+       1.578415029 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  692) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.856450e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.926302e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.926302e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.827604e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.897483e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.897483e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.902285 sec
+TOTAL       :     0.915792 sec
 INFO: No Floating Point Exceptions have been reported
-     2,641,918,143      cycles                           #    2.918 GHz                    
-     7,554,356,585      instructions                     #    2.86  insn per cycle         
-       0.906092774 seconds time elapsed
+     2,649,406,486      cycles                           #    2.883 GHz                    
+     7,553,573,033      instructions                     #    2.85  insn per cycle         
+       0.920139852 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3093) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.249746e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.464508e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.464508e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.150256e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.355278e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.355278e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.523830 sec
+TOTAL       :     0.539604 sec
 INFO: No Floating Point Exceptions have been reported
-     1,491,771,401      cycles                           #    2.831 GHz                    
-     3,160,437,103      instructions                     #    2.12  insn per cycle         
-       0.527543251 seconds time elapsed
+     1,491,577,470      cycles                           #    2.746 GHz                    
+     3,158,610,313      instructions                     #    2.12  insn per cycle         
+       0.543883786 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2969) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.610049e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.870786e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.870786e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.497640e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.745857e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.745857e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.473152 sec
+TOTAL       :     0.487658 sec
 INFO: No Floating Point Exceptions have been reported
-     1,347,000,026      cycles                           #    2.829 GHz                    
-     3,012,563,261      instructions                     #    2.24  insn per cycle         
-       0.476761119 seconds time elapsed
+     1,345,211,794      cycles                           #    2.738 GHz                    
+     3,010,561,614      instructions                     #    2.24  insn per cycle         
+       0.491852014 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2719) (512y:  104) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.451125e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.569830e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.569830e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.325885e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.437267e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.437267e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.689809 sec
+TOTAL       :     0.726256 sec
 INFO: No Floating Point Exceptions have been reported
-     1,325,269,157      cycles                           #    1.912 GHz                    
-     1,962,212,225      instructions                     #    1.48  insn per cycle         
-       0.693734086 seconds time elapsed
+     1,325,529,479      cycles                           #    1.816 GHz                    
+     1,960,541,933      instructions                     #    1.48  insn per cycle         
+       0.730627916 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1344) (512y:  106) (512z: 2217)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inlL_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inlL_hrd0.txt
new file mode 100644
index 0000000000..47e30addad
--- /dev/null
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inlL_hrd0.txt
@@ -0,0 +1,244 @@
+
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 36s
+------------------------------------------------
+
+Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
+BACKEND=cpp512y (was cppauto)
+OMPFLAGS=
+FPTYPE='d'
+HELINL='0'
+HRDCOD='0'
+HASCURAND=hasCurand
+HASHIPRAND=hasNoHiprand
+Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
+make: Nothing to be done for 'gtestlibs'.
+
+make USEBUILDDIR=1 BACKEND=cuda
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+
+make USEBUILDDIR=1 BACKEND=cppnone
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+
+make USEBUILDDIR=1 BACKEND=cppsse4
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+
+make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+
+make USEBUILDDIR=1 BACKEND=cpp512y
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+
+make USEBUILDDIR=1 BACKEND=cpp512z
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+
+DATE: 2024-08-30_00:44:30
+
+On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inlL_hrd0/check_cuda.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 1.249665e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.633413e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.669474e+07                 )  sec^-1
+MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
+TOTAL       :     0.464207 sec
+INFO: No Floating Point Exceptions have been reported
+     1,943,953,798      cycles                           #    2.865 GHz                    
+     2,787,947,476      instructions                     #    1.43  insn per cycle         
+       0.736416924 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inlL_hrd0/check_cuda.exe -p 64 256 1
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 198
+==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
+.........................................................................
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inlL_hrd0/check_cuda.exe -p 2048 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 1.460652e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.901529e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.929283e+07                 )  sec^-1
+MeanMatrixElemValue         = ( 2.602505e+02 +- 2.116328e+02 )  GeV^-2
+TOTAL       :     0.589885 sec
+INFO: No Floating Point Exceptions have been reported
+     2,362,044,108      cycles                           #    2.850 GHz                    
+     3,579,530,494      instructions                     #    1.52  insn per cycle         
+       0.885343769 seconds time elapsed
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inlL_hrd0/runTest_cuda.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inlL_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inlL_hrd0/fcheck_cuda.exe 2 64 2
+Avg ME (C++/GPU)   = 1.424749e-01
+Avg ME (F77/GPU)   = 0.14247482467490466
+Relative difference = 5.286902838873106e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inlL_hrd0/check_hip.exe
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
+EvtsPerSec[Rmb+ME]     (23) = ( 1.041005e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.063401e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.063401e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
+TOTAL       :     1.593706 sec
+INFO: No Floating Point Exceptions have been reported
+     4,677,554,561      cycles                           #    2.929 GHz                    
+    13,235,044,374      instructions                     #    2.83  insn per cycle         
+       1.597979845 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  304) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.424749e-01
+Avg ME (F77/C++)    = 0.14247482467499481
+Relative difference = 5.286896511435107e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 1.837921e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.906205e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.906205e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
+TOTAL       :     0.910803 sec
+INFO: No Floating Point Exceptions have been reported
+     2,673,804,224      cycles                           #    2.924 GHz                    
+     7,616,703,181      instructions                     #    2.85  insn per cycle         
+       0.915010405 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1031) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.424749e-01
+Avg ME (F77/C++)    = 0.14247482467499475
+Relative difference = 5.286896515331313e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 3.113630e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.314605e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.314605e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
+TOTAL       :     0.545952 sec
+INFO: No Floating Point Exceptions have been reported
+     1,505,225,537      cycles                           #    2.739 GHz                    
+     3,151,104,621      instructions                     #    2.09  insn per cycle         
+       0.550258953 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1536) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.424749e-01
+Avg ME (F77/C++)    = 0.14247482467492589
+Relative difference = 5.286901348574438e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 3.508254e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.758619e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.758619e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
+TOTAL       :     0.486223 sec
+INFO: No Floating Point Exceptions have been reported
+     1,347,084,245      cycles                           #    2.750 GHz                    
+     3,014,661,782      instructions                     #    2.24  insn per cycle         
+       0.490396435 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1372) (512y:  104) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.424749e-01
+Avg ME (F77/C++)    = 0.14247482467492589
+Relative difference = 5.286901348574438e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
+FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 2.322386e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.431592e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.431592e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
+TOTAL       :     0.728001 sec
+INFO: No Floating Point Exceptions have been reported
+     1,326,618,429      cycles                           #    1.813 GHz                    
+     1,962,704,497      instructions                     #    1.48  insn per cycle         
+       0.732300007 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1315) (512y:  106) (512z:  899)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.424749e-01
+Avg ME (F77/C++)    = 0.14247482467492589
+Relative difference = 5.286901348574438e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+
+TEST COMPLETED
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt
index 776a8e7cf2..f0eed2f4be 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 56s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-08-08_19:58:53
+DATE: 2024-08-29_22:56:42
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.177753e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.044280e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.137137e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.119421e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.036441e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.128814e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018174e+01 +- 1.429492e+01 )  GeV^-2
-TOTAL       :     0.446256 sec
+TOTAL       :     0.447469 sec
 INFO: No Floating Point Exceptions have been reported
-     1,967,028,633      cycles                           #    2.927 GHz                    
-     2,729,560,871      instructions                     #    1.39  insn per cycle         
-       0.730482007 seconds time elapsed
+     1,918,025,315      cycles                           #    2.861 GHz                    
+     2,686,229,018      instructions                     #    1.40  insn per cycle         
+       0.729916634 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 165
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.302708e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.525963e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.623999e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.929087e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.527322e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.625267e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.571360e+02 +- 2.114020e+02 )  GeV^-2
-TOTAL       :     0.480335 sec
+TOTAL       :     0.484458 sec
 INFO: No Floating Point Exceptions have been reported
-     2,062,608,643      cycles                           #    2.922 GHz                    
-     2,954,769,461      instructions                     #    1.43  insn per cycle         
-       0.763163038 seconds time elapsed
+     2,025,660,077      cycles                           #    2.857 GHz                    
+     2,912,943,658      instructions                     #    1.44  insn per cycle         
+       0.767688988 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.132642e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.159370e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.159370e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.094328e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.119456e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.119456e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018564e+01 +- 1.429903e+01 )  GeV^-2
-TOTAL       :     1.464071 sec
+TOTAL       :     1.515568 sec
 INFO: No Floating Point Exceptions have been reported
-     4,406,453,406      cycles                           #    3.003 GHz                    
-    12,951,424,799      instructions                     #    2.94  insn per cycle         
-       1.468164938 seconds time elapsed
+     4,405,562,836      cycles                           #    2.900 GHz                    
+    12,951,563,992      instructions                     #    2.94  insn per cycle         
+       1.519704133 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  645) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.856948e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.035260e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.035260e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.864259e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.042813e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.042813e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018564e+01 +- 1.429903e+01 )  GeV^-2
-TOTAL       :     0.590761 sec
+TOTAL       :     0.589275 sec
 INFO: No Floating Point Exceptions have been reported
-     1,725,972,010      cycles                           #    2.906 GHz                    
-     4,541,556,745      instructions                     #    2.63  insn per cycle         
-       0.594447330 seconds time elapsed
+     1,726,786,335      cycles                           #    2.913 GHz                    
+     4,541,721,153      instructions                     #    2.63  insn per cycle         
+       0.593457832 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3626) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.798317e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.520080e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.520080e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.646542e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.345345e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.345345e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018828e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.300105 sec
+TOTAL       :     0.307893 sec
 INFO: No Floating Point Exceptions have been reported
-       854,524,206      cycles                           #    2.821 GHz                    
-     1,917,397,512      instructions                     #    2.24  insn per cycle         
-       0.303595328 seconds time elapsed
+       854,858,999      cycles                           #    2.745 GHz                    
+     1,917,346,030      instructions                     #    2.24  insn per cycle         
+       0.312048006 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3566) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.187295e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.004492e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.004492e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.020064e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.829176e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.829176e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018828e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.282163 sec
+TOTAL       :     0.289972 sec
 INFO: No Floating Point Exceptions have been reported
-       807,334,376      cycles                           #    2.832 GHz                    
-     1,834,144,656      instructions                     #    2.27  insn per cycle         
-       0.285676418 seconds time elapsed
+       804,375,281      cycles                           #    2.741 GHz                    
+     1,834,366,299      instructions                     #    2.28  insn per cycle         
+       0.294106928 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3390) (512y:   22) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.697538e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.170455e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.170455e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.491631e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.939083e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.939083e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018829e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.368301 sec
+TOTAL       :     0.384694 sec
 INFO: No Floating Point Exceptions have been reported
-       729,603,114      cycles                           #    1.965 GHz                    
-     1,308,166,262      instructions                     #    1.79  insn per cycle         
-       0.371960958 seconds time elapsed
+       728,307,512      cycles                           #    1.876 GHz                    
+     1,308,382,279      instructions                     #    1.80  insn per cycle         
+       0.388823134 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1942) (512y:   26) (512z: 2432)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt
index e112255ddc..5605f32a61 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 03s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-08-08_20:19:22
+DATE: 2024-08-29_23:29:58
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +57,15 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.675417e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.135496e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.135496e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.460428e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.981655e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.981655e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.017654e+01 +- 1.429183e+01 )  GeV^-2
-TOTAL       :     0.454896 sec
+TOTAL       :     0.458045 sec
 INFO: No Floating Point Exceptions have been reported
-     1,922,075,239      cycles                           #    2.886 GHz                    
-     2,812,656,009      instructions                     #    1.46  insn per cycle         
-       0.723103268 seconds time elapsed
+     1,912,527,906      cycles                           #    2.861 GHz                    
+     2,832,677,934      instructions                     #    1.48  insn per cycle         
+       0.725530696 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
@@ -79,15 +83,15 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.230387e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.891837e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.891837e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.031344e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.571084e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.571084e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.609941e+02 +- 2.115589e+02 )  GeV^-2
-TOTAL       :     0.622542 sec
+TOTAL       :     0.641626 sec
 INFO: No Floating Point Exceptions have been reported
-     2,509,793,238      cycles                           #    2.945 GHz                    
-     3,839,626,015      instructions                     #    1.53  insn per cycle         
-       0.910444487 seconds time elapsed
+     2,455,324,664      cycles                           #    2.853 GHz                    
+     3,798,610,028      instructions                     #    1.55  insn per cycle         
+       0.929289203 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -110,15 +114,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.133555e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.159187e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.159187e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.100464e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.126356e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.126356e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018564e+01 +- 1.429903e+01 )  GeV^-2
-TOTAL       :     1.466168 sec
+TOTAL       :     1.510482 sec
 INFO: No Floating Point Exceptions have been reported
-     4,419,438,233      cycles                           #    3.007 GHz                    
-    12,955,838,618      instructions                     #    2.93  insn per cycle         
-       1.470344991 seconds time elapsed
+     4,428,031,735      cycles                           #    2.925 GHz                    
+    12,956,670,249      instructions                     #    2.93  insn per cycle         
+       1.514783541 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  645) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.929772e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.111984e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.111984e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.852778e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.034158e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.034158e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018564e+01 +- 1.429903e+01 )  GeV^-2
-TOTAL       :     0.580373 sec
+TOTAL       :     0.596524 sec
 INFO: No Floating Point Exceptions have been reported
-     1,747,268,230      cycles                           #    2.992 GHz                    
-     4,589,745,792      instructions                     #    2.63  insn per cycle         
-       0.584483983 seconds time elapsed
+     1,751,480,771      cycles                           #    2.918 GHz                    
+     4,589,869,030      instructions                     #    2.62  insn per cycle         
+       0.600870196 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3626) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -170,15 +174,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.766764e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.470194e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.470194e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.591986e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.306015e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.306015e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018828e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.305547 sec
+TOTAL       :     0.314944 sec
 INFO: No Floating Point Exceptions have been reported
-       873,235,026      cycles                           #    2.827 GHz                    
-     1,954,283,245      instructions                     #    2.24  insn per cycle         
-       0.309543568 seconds time elapsed
+       876,885,384      cycles                           #    2.752 GHz                    
+     1,954,408,231      instructions                     #    2.23  insn per cycle         
+       0.319132174 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3566) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -200,15 +204,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.204649e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.052966e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.052966e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.968412e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.771731e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.771731e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018828e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.285349 sec
+TOTAL       :     0.296726 sec
 INFO: No Floating Point Exceptions have been reported
-       822,856,149      cycles                           #    2.849 GHz                    
-     1,871,067,127      instructions                     #    2.27  insn per cycle         
-       0.289383401 seconds time elapsed
+       822,991,830      cycles                           #    2.740 GHz                    
+     1,870,983,532      instructions                     #    2.27  insn per cycle         
+       0.300923021 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3390) (512y:   22) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -230,15 +234,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.709235e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.178014e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.178014e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.457418e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.897878e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.897878e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018829e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.371559 sec
+TOTAL       :     0.391935 sec
 INFO: No Floating Point Exceptions have been reported
-       748,105,287      cycles                           #    1.994 GHz                    
-     1,349,627,266      instructions                     #    1.80  insn per cycle         
-       0.375758776 seconds time elapsed
+       751,588,906      cycles                           #    1.900 GHz                    
+     1,349,727,321      instructions                     #    1.80  insn per cycle         
+       0.396323799 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1942) (512y:   26) (512z: 2432)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt
index f4c5647b28..d040177026 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 48s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-08-08_19:59:05
+DATE: 2024-08-29_22:56:54
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.121935e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.045477e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.150621e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.127637e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.053180e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.153862e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018174e+01 +- 1.429492e+01 )  GeV^-2
-TOTAL       :     0.441822 sec
+TOTAL       :     0.446303 sec
 INFO: No Floating Point Exceptions have been reported
-     1,919,824,453      cycles                           #    2.925 GHz                    
-     2,711,548,396      instructions                     #    1.41  insn per cycle         
-       0.712257308 seconds time elapsed
+     1,892,017,092      cycles                           #    2.852 GHz                    
+     2,646,562,805      instructions                     #    1.40  insn per cycle         
+       0.720494676 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 164
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.453927e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.579708e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.670884e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.004070e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.568435e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.672294e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.571360e+02 +- 2.114020e+02 )  GeV^-2
-TOTAL       :     0.482328 sec
+TOTAL       :     0.485987 sec
 INFO: No Floating Point Exceptions have been reported
-     2,075,215,740      cycles                           #    2.939 GHz                    
-     2,958,576,913      instructions                     #    1.43  insn per cycle         
-       0.765173729 seconds time elapsed
+     2,027,231,831      cycles                           #    2.852 GHz                    
+     2,888,437,663      instructions                     #    1.42  insn per cycle         
+       0.769793031 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.138812e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.164706e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.164706e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.096156e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.120921e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.120921e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018564e+01 +- 1.429903e+01 )  GeV^-2
-TOTAL       :     1.455800 sec
+TOTAL       :     1.512692 sec
 INFO: No Floating Point Exceptions have been reported
-     4,403,258,677      cycles                           #    3.018 GHz                    
-    12,926,930,475      instructions                     #    2.94  insn per cycle         
-       1.459744309 seconds time elapsed
+     4,411,886,190      cycles                           #    2.910 GHz                    
+    12,928,184,190      instructions                     #    2.93  insn per cycle         
+       1.516753339 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  630) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.936303e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.120025e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.120025e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.878836e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.060923e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.060923e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018564e+01 +- 1.429903e+01 )  GeV^-2
-TOTAL       :     0.574725 sec
+TOTAL       :     0.585977 sec
 INFO: No Floating Point Exceptions have been reported
-     1,726,777,095      cycles                           #    2.987 GHz                    
-     4,536,166,658      instructions                     #    2.63  insn per cycle         
-       0.578775017 seconds time elapsed
+     1,726,387,164      cycles                           #    2.928 GHz                    
+     4,536,203,734      instructions                     #    2.63  insn per cycle         
+       0.590186867 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3610) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.813817e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.547021e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.547021e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.660034e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.358223e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.358223e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018828e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.298922 sec
+TOTAL       :     0.307186 sec
 INFO: No Floating Point Exceptions have been reported
-       857,389,967      cycles                           #    2.838 GHz                    
-     1,914,305,415      instructions                     #    2.23  insn per cycle         
-       0.302780018 seconds time elapsed
+       856,403,810      cycles                           #    2.756 GHz                    
+     1,914,283,667      instructions                     #    2.24  insn per cycle         
+       0.311393558 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3536) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.307694e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.166095e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.166095e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.036262e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.837462e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.837462e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018828e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.276778 sec
+TOTAL       :     0.289093 sec
 INFO: No Floating Point Exceptions have been reported
-       801,815,801      cycles                           #    2.863 GHz                    
-     1,829,952,798      instructions                     #    2.28  insn per cycle         
-       0.280644988 seconds time elapsed
+       803,624,213      cycles                           #    2.746 GHz                    
+     1,830,203,476      instructions                     #    2.28  insn per cycle         
+       0.293218472 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3354) (512y:   22) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.668444e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.134327e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.134327e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.513089e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.950488e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.950488e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018829e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.370402 sec
+TOTAL       :     0.383052 sec
 INFO: No Floating Point Exceptions have been reported
-       727,659,849      cycles                           #    1.947 GHz                    
-     1,306,194,061      instructions                     #    1.80  insn per cycle         
-       0.374419699 seconds time elapsed
+       728,226,687      cycles                           #    1.884 GHz                    
+     1,306,112,916      instructions                     #    1.79  insn per cycle         
+       0.387247657 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1905) (512y:   26) (512z: 2435)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inlL_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inlL_hrd0.txt
new file mode 100644
index 0000000000..daf3b7ead8
--- /dev/null
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inlL_hrd0.txt
@@ -0,0 +1,244 @@
+
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 26s
+------------------------------------------------
+
+Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
+BACKEND=cpp512y (was cppauto)
+OMPFLAGS=
+FPTYPE='d'
+HELINL='0'
+HRDCOD='0'
+HASCURAND=hasCurand
+HASHIPRAND=hasNoHiprand
+Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
+make: Nothing to be done for 'gtestlibs'.
+
+make USEBUILDDIR=1 BACKEND=cuda
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+
+make USEBUILDDIR=1 BACKEND=cppnone
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+
+make USEBUILDDIR=1 BACKEND=cppsse4
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+
+make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+
+make USEBUILDDIR=1 BACKEND=cpp512y
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+
+make USEBUILDDIR=1 BACKEND=cpp512z
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+
+DATE: 2024-08-30_00:44:43
+
+On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inlL_hrd0/check_cuda.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 2.615081e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.587850e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.712339e+07                 )  sec^-1
+MeanMatrixElemValue         = ( 2.018174e+01 +- 1.429492e+01 )  GeV^-2
+TOTAL       :     0.451322 sec
+INFO: No Floating Point Exceptions have been reported
+     1,917,039,404      cycles                           #    2.847 GHz                    
+     2,701,931,216      instructions                     #    1.41  insn per cycle         
+       0.730405757 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inlL_hrd0/check_cuda.exe -p 64 256 1
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 115
+==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
+.........................................................................
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inlL_hrd0/check_cuda.exe -p 2048 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 2.964707e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.911517e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.975518e+07                 )  sec^-1
+MeanMatrixElemValue         = ( 2.571360e+02 +- 2.114020e+02 )  GeV^-2
+TOTAL       :     0.517121 sec
+INFO: No Floating Point Exceptions have been reported
+     2,143,553,399      cycles                           #    2.858 GHz                    
+     3,138,064,658      instructions                     #    1.46  insn per cycle         
+       0.808917445 seconds time elapsed
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inlL_hrd0/runTest_cuda.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inlL_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inlL_hrd0/fcheck_cuda.exe 2 64 2
+Avg ME (C++/GPU)   = 1.424226e-01
+Avg ME (F77/GPU)   = 0.14247487904342115
+Relative difference = 0.0003670698570391495
+OK (relative difference <= 5E-3)
+=========================================================================
+Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inlL_hrd0/check_hip.exe
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
+EvtsPerSec[Rmb+ME]     (23) = ( 1.076493e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.100412e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.100412e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.018564e+01 +- 1.429903e+01 )  GeV^-2
+TOTAL       :     1.540261 sec
+INFO: No Floating Point Exceptions have been reported
+     4,493,879,750      cycles                           #    2.911 GHz                    
+    13,101,028,096      instructions                     #    2.92  insn per cycle         
+       1.544473248 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  272) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.424686e-01
+Avg ME (F77/C++)    = 0.14246861273707090
+Relative difference = 8.94026536232867e-08
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 2.851767e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.030226e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.030226e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.018564e+01 +- 1.429903e+01 )  GeV^-2
+TOTAL       :     0.591973 sec
+INFO: No Floating Point Exceptions have been reported
+     1,734,526,321      cycles                           #    2.913 GHz                    
+     4,562,505,904      instructions                     #    2.63  insn per cycle         
+       0.596084732 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1510) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.424686e-01
+Avg ME (F77/C++)    = 0.14246862329122401
+Relative difference = 1.6348320966878032e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 5.620425e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.302186e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.302186e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.018828e+01 +- 1.429922e+01 )  GeV^-2
+TOTAL       :     0.309262 sec
+INFO: No Floating Point Exceptions have been reported
+       856,939,356      cycles                           #    2.741 GHz                    
+     1,911,443,740      instructions                     #    2.23  insn per cycle         
+       0.313320866 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2090) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.424749e-01
+Avg ME (F77/C++)    = 0.14247491543012991
+Relative difference = 1.0830068962165901e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 6.080332e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.885290e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.885290e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.018828e+01 +- 1.429922e+01 )  GeV^-2
+TOTAL       :     0.287179 sec
+INFO: No Floating Point Exceptions have been reported
+       807,094,694      cycles                           #    2.777 GHz                    
+     1,838,102,061      instructions                     #    2.28  insn per cycle         
+       0.291236695 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2001) (512y:   22) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.424749e-01
+Avg ME (F77/C++)    = 0.14247491543012991
+Relative difference = 1.0830068962165901e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
+FP precision                = FLOAT (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 4.459210e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.894713e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.894713e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.018829e+01 +- 1.429922e+01 )  GeV^-2
+TOTAL       :     0.387278 sec
+INFO: No Floating Point Exceptions have been reported
+       734,145,292      cycles                           #    1.878 GHz                    
+     1,310,957,278      instructions                     #    1.79  insn per cycle         
+       0.391579518 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1885) (512y:   26) (512z: 1099)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.424749e-01
+Avg ME (F77/C++)    = 0.14247491576758442
+Relative difference = 1.1066920862943416e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+
+TEST COMPLETED
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt
index 14cf46cbcc..7e1b4e6534 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 56s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-08-08_19:59:17
+DATE: 2024-08-29_22:57:06
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.769849e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.334726e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.696577e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.749807e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.308121e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.661112e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.447945 sec
+TOTAL       :     0.450446 sec
 INFO: No Floating Point Exceptions have been reported
-     1,970,077,649      cycles                           #    2.938 GHz                    
-     2,764,650,199      instructions                     #    1.40  insn per cycle         
-       0.727384144 seconds time elapsed
+     1,888,302,564      cycles                           #    2.834 GHz                    
+     2,667,402,552      instructions                     #    1.41  insn per cycle         
+       0.723703520 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.502555e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.204679e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.563131e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.346796e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.137623e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.549952e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.602505e+02 +- 2.116328e+02 )  GeV^-2
-TOTAL       :     0.530343 sec
+TOTAL       :     0.534520 sec
 INFO: No Floating Point Exceptions have been reported
-     2,259,914,656      cycles                           #    2.930 GHz                    
-     3,250,253,432      instructions                     #    1.44  insn per cycle         
-       0.828686428 seconds time elapsed
+     2,203,657,514      cycles                           #    2.855 GHz                    
+     3,143,925,936      instructions                     #    1.43  insn per cycle         
+       0.829949035 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.069358e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.092261e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.092261e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.051238e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.073958e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.073958e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     1.552012 sec
+TOTAL       :     1.578696 sec
 INFO: No Floating Point Exceptions have been reported
-     4,641,202,069      cycles                           #    2.985 GHz                    
-    13,179,687,646      instructions                     #    2.84  insn per cycle         
-       1.555810770 seconds time elapsed
+     4,641,612,180      cycles                           #    2.934 GHz                    
+    13,177,949,346      instructions                     #    2.84  insn per cycle         
+       1.582867704 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  681) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.876933e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.946940e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.946940e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.853038e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.923408e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.923408e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.892460 sec
+TOTAL       :     0.903441 sec
 INFO: No Floating Point Exceptions have been reported
-     2,644,592,448      cycles                           #    2.953 GHz                    
-     7,475,728,591      instructions                     #    2.83  insn per cycle         
-       0.896244087 seconds time elapsed
+     2,654,124,047      cycles                           #    2.927 GHz                    
+     7,474,562,151      instructions                     #    2.82  insn per cycle         
+       0.907731665 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3152) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.303870e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.519584e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.519584e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.181787e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.395557e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.395557e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.515449 sec
+TOTAL       :     0.534479 sec
 INFO: No Floating Point Exceptions have been reported
-     1,473,674,467      cycles                           #    2.841 GHz                    
-     3,129,036,980      instructions                     #    2.12  insn per cycle         
-       0.519216773 seconds time elapsed
+     1,477,170,124      cycles                           #    2.745 GHz                    
+     3,127,314,564      instructions                     #    2.12  insn per cycle         
+       0.538817015 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3119) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.630465e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.893768e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.893768e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.518635e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.785177e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.785177e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.471176 sec
+TOTAL       :     0.485514 sec
 INFO: No Floating Point Exceptions have been reported
-     1,324,066,570      cycles                           #    2.791 GHz                    
-     2,982,910,932      instructions                     #    2.25  insn per cycle         
-       0.474943404 seconds time elapsed
+     1,324,922,733      cycles                           #    2.709 GHz                    
+     2,981,261,784      instructions                     #    2.25  insn per cycle         
+       0.489729459 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2881) (512y:  110) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.354541e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.462714e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.462714e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.248846e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.351092e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.351092e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.717844 sec
+TOTAL       :     0.750272 sec
 INFO: No Floating Point Exceptions have been reported
-     1,364,512,931      cycles                           #    1.893 GHz                    
-     1,991,624,740      instructions                     #    1.46  insn per cycle         
-       0.721728207 seconds time elapsed
+     1,367,223,495      cycles                           #    1.814 GHz                    
+     1,990,069,192      instructions                     #    1.46  insn per cycle         
+       0.754512761 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1656) (512y:  108) (512z: 2251)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt
index 5b20c017bf..a62de088c9 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 50s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-08-08_19:59:31
+DATE: 2024-08-29_22:57:19
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.764426e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.211229e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.545216e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.682090e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.100032e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.454090e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.450206 sec
+TOTAL       :     0.453456 sec
 INFO: No Floating Point Exceptions have been reported
-     1,949,946,468      cycles                           #    2.935 GHz                    
-     2,761,346,859      instructions                     #    1.42  insn per cycle         
-       0.722536101 seconds time elapsed
+     1,900,785,946      cycles                           #    2.849 GHz                    
+     2,696,170,354      instructions                     #    1.42  insn per cycle         
+       0.725943157 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.478869e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.028008e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.358881e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.329235e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.987947e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.373459e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.602505e+02 +- 2.116328e+02 )  GeV^-2
-TOTAL       :     0.526742 sec
+TOTAL       :     0.531321 sec
 INFO: No Floating Point Exceptions have been reported
-     2,265,443,315      cycles                           #    2.945 GHz                    
-     3,237,723,769      instructions                     #    1.43  insn per cycle         
-       0.826628143 seconds time elapsed
+     2,203,065,418      cycles                           #    2.864 GHz                    
+     3,159,908,120      instructions                     #    1.43  insn per cycle         
+       0.826130785 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.082497e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.105654e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.105654e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.034563e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.056930e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.056930e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     1.532875 sec
+TOTAL       :     1.603724 sec
 INFO: No Floating Point Exceptions have been reported
-     4,647,233,937      cycles                           #    3.025 GHz                    
-    13,168,093,251      instructions                     #    2.83  insn per cycle         
-       1.537009895 seconds time elapsed
+     4,646,351,391      cycles                           #    2.891 GHz                    
+    13,165,982,220      instructions                     #    2.83  insn per cycle         
+       1.607855644 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  666) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.916408e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.986697e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.986697e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.860303e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.930543e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.930543e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.873749 sec
+TOTAL       :     0.899725 sec
 INFO: No Floating Point Exceptions have been reported
-     2,638,584,974      cycles                           #    3.010 GHz                    
-     7,477,829,189      instructions                     #    2.83  insn per cycle         
-       0.877352084 seconds time elapsed
+     2,640,401,361      cycles                           #    2.923 GHz                    
+     7,476,357,327      instructions                     #    2.83  insn per cycle         
+       0.904022044 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3141) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.313421e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.533027e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.533027e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.187408e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.395650e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.395650e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.513511 sec
+TOTAL       :     0.533442 sec
 INFO: No Floating Point Exceptions have been reported
-     1,473,425,351      cycles                           #    2.852 GHz                    
-     3,129,237,400      instructions                     #    2.12  insn per cycle         
-       0.517237290 seconds time elapsed
+     1,473,667,938      cycles                           #    2.744 GHz                    
+     3,127,923,561      instructions                     #    2.12  insn per cycle         
+       0.537621751 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3097) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.703540e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.984962e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.984962e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.577590e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.837843e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.837843e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.461287 sec
+TOTAL       :     0.477166 sec
 INFO: No Floating Point Exceptions have been reported
-     1,320,825,681      cycles                           #    2.850 GHz                    
-     2,983,955,617      instructions                     #    2.26  insn per cycle         
-       0.465038534 seconds time elapsed
+     1,321,328,150      cycles                           #    2.748 GHz                    
+     2,982,033,810      instructions                     #    2.26  insn per cycle         
+       0.481493993 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2857) (512y:  110) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.367399e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.477116e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.477116e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.247156e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.351150e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.351150e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.713600 sec
+TOTAL       :     0.750754 sec
 INFO: No Floating Point Exceptions have been reported
-     1,364,189,990      cycles                           #    1.903 GHz                    
-     1,991,688,961      instructions                     #    1.46  insn per cycle         
-       0.717422383 seconds time elapsed
+     1,365,868,869      cycles                           #    1.811 GHz                    
+     1,990,498,794      instructions                     #    1.46  insn per cycle         
+       0.754982203 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1632) (512y:  108) (512z: 2251)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inlL_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inlL_hrd0.txt
new file mode 100644
index 0000000000..c798b3334f
--- /dev/null
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inlL_hrd0.txt
@@ -0,0 +1,244 @@
+
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 27s
+------------------------------------------------
+
+Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
+BACKEND=cpp512y (was cppauto)
+OMPFLAGS=
+FPTYPE='d'
+HELINL='0'
+HRDCOD='0'
+HASCURAND=hasCurand
+HASHIPRAND=hasNoHiprand
+Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
+make: Nothing to be done for 'gtestlibs'.
+
+make USEBUILDDIR=1 BACKEND=cuda
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+
+make USEBUILDDIR=1 BACKEND=cppnone
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+
+make USEBUILDDIR=1 BACKEND=cppsse4
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+
+make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+
+make USEBUILDDIR=1 BACKEND=cpp512y
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+
+make USEBUILDDIR=1 BACKEND=cpp512z
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+
+DATE: 2024-08-30_00:44:55
+
+On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inlL_hrd0/check_cuda.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 1.250929e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.635726e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.670682e+07                 )  sec^-1
+MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
+TOTAL       :     0.466193 sec
+INFO: No Floating Point Exceptions have been reported
+     1,934,740,327      cycles                           #    2.845 GHz                    
+     2,796,573,515      instructions                     #    1.45  insn per cycle         
+       0.738016021 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inlL_hrd0/check_cuda.exe -p 64 256 1
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 198
+==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
+.........................................................................
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inlL_hrd0/check_cuda.exe -p 2048 256 1 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+EvtsPerSec[Rmb+ME]     (23) = ( 1.460710e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.902456e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.930622e+07                 )  sec^-1
+MeanMatrixElemValue         = ( 2.602505e+02 +- 2.116328e+02 )  GeV^-2
+TOTAL       :     0.592180 sec
+INFO: No Floating Point Exceptions have been reported
+     2,378,381,230      cycles                           #    2.868 GHz                    
+     3,561,554,742      instructions                     #    1.50  insn per cycle         
+       0.887503584 seconds time elapsed
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inlL_hrd0/runTest_cuda.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inlL_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inlL_hrd0/fcheck_cuda.exe 2 64 2
+Avg ME (C++/GPU)   = 1.424749e-01
+Avg ME (F77/GPU)   = 0.14247482577104625
+Relative difference = 5.209967070245855e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inlL_hrd0/check_hip.exe
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
+EvtsPerSec[Rmb+ME]     (23) = ( 1.043708e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.066157e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.066157e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
+TOTAL       :     1.590033 sec
+INFO: No Floating Point Exceptions have been reported
+     4,675,200,022      cycles                           #    2.934 GHz                    
+    13,225,778,431      instructions                     #    2.83  insn per cycle         
+       1.594331237 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  281) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.424749e-01
+Avg ME (F77/C++)    = 0.14247482734618697
+Relative difference = 5.099411406595165e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 1.841458e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.910901e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.910901e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
+TOTAL       :     0.908819 sec
+INFO: No Floating Point Exceptions have been reported
+     2,672,459,206      cycles                           #    2.930 GHz                    
+     7,553,463,438      instructions                     #    2.83  insn per cycle         
+       0.912853526 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1132) (avx2:    0) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.424749e-01
+Avg ME (F77/C++)    = 0.14247482734618697
+Relative difference = 5.099411406595165e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 3.166723e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.374383e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.374383e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
+TOTAL       :     0.536965 sec
+INFO: No Floating Point Exceptions have been reported
+     1,480,693,561      cycles                           #    2.739 GHz                    
+     3,134,816,659      instructions                     #    2.12  insn per cycle         
+       0.541161557 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1751) (512y:    0) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.424749e-01
+Avg ME (F77/C++)    = 0.14247482643254802
+Relative difference = 5.163537715318965e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 3.550416e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.806841e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.806841e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
+TOTAL       :     0.481032 sec
+INFO: No Floating Point Exceptions have been reported
+     1,330,671,275      cycles                           #    2.746 GHz                    
+     2,996,505,280      instructions                     #    2.25  insn per cycle         
+       0.485174170 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1584) (512y:  110) (512z:    0)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.424749e-01
+Avg ME (F77/C++)    = 0.14247482643254802
+Relative difference = 5.163537715318965e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inlL_hrd0/check_cpp.exe -p 64 256 10 OMP=
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
+Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
+FP precision                = MIXED (NaN/abnormal=0, zero=0)
+Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
+EvtsPerSec[Rmb+ME]     (23) = ( 2.228163e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.330666e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.330666e+05                 )  sec^-1
+MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
+TOTAL       :     0.757294 sec
+INFO: No Floating Point Exceptions have been reported
+     1,367,762,875      cycles                           #    1.797 GHz                    
+     1,996,489,699      instructions                     #    1.46  insn per cycle         
+       0.761516894 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1603) (512y:  108) (512z:  990)
+-------------------------------------------------------------------------
+runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inlL_hrd0/runTest_cpp.exe
+INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
+[  PASSED  ] 3 tests.
+INFO: No Floating Point Exceptions have been reported
+-------------------------------------------------------------------------
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inlL_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inlL_hrd0/fcheck_cpp.exe 2 64 2
+Avg ME (C++/C++)    = 1.424749e-01
+Avg ME (F77/C++)    = 0.14247482643254802
+Relative difference = 5.163537715318965e-07
+OK (relative difference <= 5E-3)
+=========================================================================
+
+TEST COMPLETED
diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt
index 83b828ef2e..5974fe5c05 100644
--- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 01m 07s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
-DATE: 2024-08-08_20:39:39
+DATE: 2024-08-30_00:10:03
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.966123e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.101302e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.184882e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.565771e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.087963e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.183774e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     0.517997 sec
+TOTAL       :     0.527958 sec
 INFO: No Floating Point Exceptions have been reported
-     2,197,627,386      cycles                           #    2.931 GHz                    
-     3,156,596,662      instructions                     #    1.44  insn per cycle         
-       0.806377685 seconds time elapsed
+     2,183,035,709      cycles                           #    2.857 GHz                    
+     3,157,151,269      instructions                     #    1.45  insn per cycle         
+       0.821528267 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 226
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.676906e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.715525e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.715525e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.616617e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.653100e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.653100e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     6.391723 sec
+TOTAL       :     6.596281 sec
 INFO: No Floating Point Exceptions have been reported
-    19,396,886,248      cycles                           #    3.031 GHz                    
-    52,050,532,705      instructions                     #    2.68  insn per cycle         
-       6.400835825 seconds time elapsed
+    19,282,941,048      cycles                           #    2.921 GHz                    
+    51,926,972,074      instructions                     #    2.69  insn per cycle         
+       6.601945823 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  668) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.012360e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.148434e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.148434e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.899605e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.029471e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.029471e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     3.619594 sec
+TOTAL       :     3.728280 sec
 INFO: No Floating Point Exceptions have been reported
-    11,008,104,240      cycles                           #    3.034 GHz                    
-    30,899,851,824      instructions                     #    2.81  insn per cycle         
-       3.628709587 seconds time elapsed
+    10,887,007,673      cycles                           #    2.916 GHz                    
+    30,780,884,538      instructions                     #    2.83  insn per cycle         
+       3.733824013 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2914) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.811277e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.159957e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.159957e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.691675e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.024979e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.024979e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.317730 sec
+TOTAL       :     2.344753 sec
 INFO: No Floating Point Exceptions have been reported
-     6,603,833,232      cycles                           #    2.839 GHz                    
-    13,785,660,246      instructions                     #    2.09  insn per cycle         
-       2.326886320 seconds time elapsed
+     6,446,140,954      cycles                           #    2.744 GHz                    
+    13,661,836,237      instructions                     #    2.12  insn per cycle         
+       2.350400868 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2934) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.274677e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.701182e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.701182e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.132027e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.532809e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.532809e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.128100 sec
+TOTAL       :     2.152562 sec
 INFO: No Floating Point Exceptions have been reported
-     6,037,170,556      cycles                           #    2.826 GHz                    
-    13,124,188,246      instructions                     #    2.17  insn per cycle         
-       2.137191260 seconds time elapsed
+     5,941,792,199      cycles                           #    2.754 GHz                    
+    13,004,857,023      instructions                     #    2.19  insn per cycle         
+       2.158116058 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2660) (512y:  146) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.546906e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.734269e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.734269e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.404422e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.577036e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.577036e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     3.095180 sec
+TOTAL       :     3.190739 sec
 INFO: No Floating Point Exceptions have been reported
-     5,952,641,894      cycles                           #    1.919 GHz                    
-     8,707,382,958      instructions                     #    1.46  insn per cycle         
-       3.104614357 seconds time elapsed
+     5,831,326,790      cycles                           #    1.825 GHz                    
+     8,584,787,842      instructions                     #    1.47  insn per cycle         
+       3.196417922 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1494) (512y:  128) (512z: 1942)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt
index 6dfb3d97d4..adfc4bbe17 100644
--- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 49s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
-DATE: 2024-08-08_20:40:05
+DATE: 2024-08-30_00:10:30
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.936743e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.101495e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.185931e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.567664e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.091591e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.187477e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     0.520732 sec
+TOTAL       :     0.530828 sec
 INFO: No Floating Point Exceptions have been reported
-     2,199,613,002      cycles                           #    2.925 GHz                    
-     3,199,605,848      instructions                     #    1.45  insn per cycle         
-       0.808356541 seconds time elapsed
+     2,204,760,373      cycles                           #    2.868 GHz                    
+     3,152,488,657      instructions                     #    1.43  insn per cycle         
+       0.828555127 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.741086e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.782692e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.782692e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.697449e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.737720e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.737720e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     6.159994 sec
+TOTAL       :     6.286999 sec
 INFO: No Floating Point Exceptions have been reported
-    18,606,289,146      cycles                           #    3.016 GHz                    
-    50,188,372,015      instructions                     #    2.70  insn per cycle         
-       6.169438178 seconds time elapsed
+    18,353,069,323      cycles                           #    2.917 GHz                    
+    50,057,473,820      instructions                     #    2.73  insn per cycle         
+       6.292828149 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  626) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.098336e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.247173e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.247173e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.072939e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.218519e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.218519e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     3.523816 sec
+TOTAL       :     3.523041 sec
 INFO: No Floating Point Exceptions have been reported
-    10,442,361,179      cycles                           #    2.956 GHz                    
-    29,279,251,351      instructions                     #    2.80  insn per cycle         
-       3.532990329 seconds time elapsed
+    10,324,590,511      cycles                           #    2.927 GHz                    
+    29,157,948,833      instructions                     #    2.82  insn per cycle         
+       3.528873583 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2732) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.443138e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.746940e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.746940e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.338289e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.619702e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.619702e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.497852 sec
+TOTAL       :     2.526518 sec
 INFO: No Floating Point Exceptions have been reported
-     7,066,085,833      cycles                           #    2.820 GHz                    
-    15,266,746,500      instructions                     #    2.16  insn per cycle         
-       2.506843234 seconds time elapsed
+     6,923,035,282      cycles                           #    2.735 GHz                    
+    15,145,434,537      instructions                     #    2.19  insn per cycle         
+       2.532145959 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3014) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.619490e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.939857e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.939857e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.516990e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.824523e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.824523e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.408665 sec
+TOTAL       :     2.431225 sec
 INFO: No Floating Point Exceptions have been reported
-     6,801,023,817      cycles                           #    2.814 GHz                    
-    14,741,025,083      instructions                     #    2.17  insn per cycle         
-       2.418105582 seconds time elapsed
+     6,666,697,943      cycles                           #    2.737 GHz                    
+    14,616,405,824      instructions                     #    2.19  insn per cycle         
+       2.436846304 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2610) (512y:  302) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.467108e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.646231e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.646231e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.295253e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.454598e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.454598e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     3.162174 sec
+TOTAL       :     3.291723 sec
 INFO: No Floating Point Exceptions have been reported
-     6,163,693,414      cycles                           #    1.944 GHz                    
-    10,458,436,313      instructions                     #    1.70  insn per cycle         
-       3.171538437 seconds time elapsed
+     6,035,530,423      cycles                           #    1.831 GHz                    
+    10,335,265,923      instructions                     #    1.71  insn per cycle         
+       3.297372600 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1256) (512y:  214) (512z: 2129)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt
index f2fae03e6f..3a770e0987 100644
--- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 55s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
-DATE: 2024-08-08_20:40:31
+DATE: 2024-08-30_00:10:56
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.265904e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.014084e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.164702e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.398478e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.000098e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.167990e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 7.154219e+00 +- 1.620281e-01 )  GeV^0
-TOTAL       :     0.479298 sec
+TOTAL       :     0.486397 sec
 INFO: No Floating Point Exceptions have been reported
-     2,081,740,099      cycles                           #    2.923 GHz                    
-     2,980,788,530      instructions                     #    1.43  insn per cycle         
-       0.769444492 seconds time elapsed
+     2,024,122,033      cycles                           #    2.853 GHz                    
+     2,916,570,176      instructions                     #    1.44  insn per cycle         
+       0.767663174 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 157
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.729175e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.771417e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.771417e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.678377e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.720094e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.720094e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.175644e+00 +- 1.658767e-01 )  GeV^0
-TOTAL       :     6.156936 sec
+TOTAL       :     6.334718 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-    18,595,330,502      cycles                           #    3.018 GHz                    
-    51,251,959,778      instructions                     #    2.76  insn per cycle         
-       6.163337596 seconds time elapsed
+    18,555,321,069      cycles                           #    2.927 GHz                    
+    51,213,843,891      instructions                     #    2.76  insn per cycle         
+       6.340063735 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  625) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -113,15 +117,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.099341e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.368380e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.368380e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.997779e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.258013e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.258013e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.175642e+00 +- 1.658767e-01 )  GeV^0
-TOTAL       :     2.652061 sec
+TOTAL       :     2.708980 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     7,973,155,362      cycles                           #    3.000 GHz                    
-    19,354,832,142      instructions                     #    2.43  insn per cycle         
-       2.658432650 seconds time elapsed
+     7,936,378,928      cycles                           #    2.926 GHz                    
+    19,315,957,189      instructions                     #    2.43  insn per cycle         
+       2.714320099 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3543) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -144,15 +148,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.856741e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.854878e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.854878e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.845222e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.836028e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.836028e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.198861e+00 +- 1.710281e-01 )  GeV^0
-TOTAL       :     1.428829 sec
+TOTAL       :     1.422585 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     4,050,150,212      cycles                           #    2.823 GHz                    
-     8,874,617,638      instructions                     #    2.19  insn per cycle         
-       1.435345706 seconds time elapsed
+     3,940,333,189      cycles                           #    2.761 GHz                    
+     8,829,110,284      instructions                     #    2.24  insn per cycle         
+       1.427918817 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3701) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -173,15 +177,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.579308e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.783002e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.783002e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.323802e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.447444e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.447444e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.198861e+00 +- 1.710281e-01 )  GeV^0
-TOTAL       :     1.316483 sec
+TOTAL       :     1.345781 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     3,770,202,308      cycles                           #    2.852 GHz                    
-     8,473,429,912      instructions                     #    2.25  insn per cycle         
-       1.322971561 seconds time elapsed
+     3,723,275,845      cycles                           #    2.757 GHz                    
+     8,433,230,552      instructions                     #    2.27  insn per cycle         
+       1.351128682 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3531) (512y:   20) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -202,15 +206,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.340113e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.941423e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.941423e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.920739e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.460739e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.460739e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.198861e+00 +- 1.710281e-01 )  GeV^0
-TOTAL       :     1.746808 sec
+TOTAL       :     1.857321 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     3,535,492,788      cycles                           #    2.017 GHz                    
-     6,276,858,891      instructions                     #    1.78  insn per cycle         
-       1.753255052 seconds time elapsed
+     3,505,970,146      cycles                           #    1.883 GHz                    
+     6,241,421,267      instructions                     #    1.78  insn per cycle         
+       1.862744743 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2373) (512y:   24) (512z: 2288)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt
index 0a0273143f..6fd24bcfe5 100644
--- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 50s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
-DATE: 2024-08-08_20:40:52
+DATE: 2024-08-30_00:11:18
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.367628e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.048579e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.197733e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.506429e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.027813e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.199337e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 7.154219e+00 +- 1.620281e-01 )  GeV^0
-TOTAL       :     0.477604 sec
+TOTAL       :     0.487321 sec
 INFO: No Floating Point Exceptions have been reported
-     2,076,219,464      cycles                           #    2.927 GHz                    
-     2,975,745,460      instructions                     #    1.43  insn per cycle         
-       0.766187526 seconds time elapsed
+     2,021,518,183      cycles                           #    2.846 GHz                    
+     2,916,248,031      instructions                     #    1.44  insn per cycle         
+       0.768383991 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 131
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.736285e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.779068e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.779068e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.723287e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.768803e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.768803e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.175644e+00 +- 1.658767e-01 )  GeV^0
-TOTAL       :     6.132525 sec
+TOTAL       :     6.171783 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-    18,052,449,940      cycles                           #    2.941 GHz                    
-    49,636,091,735      instructions                     #    2.75  insn per cycle         
-       6.138910377 seconds time elapsed
+    18,041,103,143      cycles                           #    2.921 GHz                    
+    49,602,992,577      instructions                     #    2.75  insn per cycle         
+       6.177081980 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  613) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe
@@ -113,15 +117,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.614737e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.962775e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.962775e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.514454e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.848137e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.848137e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.175642e+00 +- 1.658767e-01 )  GeV^0
-TOTAL       :     2.366728 sec
+TOTAL       :     2.409156 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     7,117,859,932      cycles                           #    3.001 GHz                    
-    18,522,428,859      instructions                     #    2.60  insn per cycle         
-       2.373189090 seconds time elapsed
+     7,065,969,827      cycles                           #    2.927 GHz                    
+    18,480,893,076      instructions                     #    2.62  insn per cycle         
+       2.414608873 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3235) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe
@@ -144,15 +148,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.520738e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.991057e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.991057e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.355617e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.802219e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.802219e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.198861e+00 +- 1.710281e-01 )  GeV^0
-TOTAL       :     1.992175 sec
+TOTAL       :     2.043734 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     5,687,734,724      cycles                           #    2.847 GHz                    
-    10,882,767,796      instructions                     #    1.91  insn per cycle         
-       1.998751657 seconds time elapsed
+     5,636,114,095      cycles                           #    2.752 GHz                    
+    10,845,591,498      instructions                     #    1.92  insn per cycle         
+       2.049012424 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4260) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe
@@ -175,15 +179,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.605855e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.093953e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.093953e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.424944e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.877966e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.877966e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.198861e+00 +- 1.710281e-01 )  GeV^0
-TOTAL       :     1.963543 sec
+TOTAL       :     2.018741 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     5,605,481,105      cycles                           #    2.846 GHz                    
-    10,580,081,810      instructions                     #    1.89  insn per cycle         
-       1.969981859 seconds time elapsed
+     5,551,896,925      cycles                           #    2.744 GHz                    
+    10,543,438,618      instructions                     #    1.90  insn per cycle         
+       2.024091086 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4123) (512y:   12) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest_cpp.exe
@@ -206,15 +210,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.560324e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.865892e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.865892e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.344942e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.633588e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.633588e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.198861e+00 +- 1.710281e-01 )  GeV^0
-TOTAL       :     2.392840 sec
+TOTAL       :     2.500651 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     4,694,796,569      cycles                           #    1.957 GHz                    
-     8,695,099,464      instructions                     #    1.85  insn per cycle         
-       2.399389128 seconds time elapsed
+     4,636,697,064      cycles                           #    1.851 GHz                    
+     8,657,159,753      instructions                     #    1.87  insn per cycle         
+       2.506121530 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2849) (512y:    0) (512z: 2883)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt
index 62d3c322fa..2259d7cf4f 100644
--- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 55s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
-DATE: 2024-08-08_20:41:15
+DATE: 2024-08-30_00:11:41
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.961744e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.101148e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.184921e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.538666e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.087660e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.184391e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     0.519363 sec
+TOTAL       :     0.527953 sec
 INFO: No Floating Point Exceptions have been reported
-     2,191,794,568      cycles                           #    2.919 GHz                    
-     3,157,238,703      instructions                     #    1.44  insn per cycle         
-       0.807852407 seconds time elapsed
+     2,183,107,430      cycles                           #    2.872 GHz                    
+     3,152,023,505      instructions                     #    1.44  insn per cycle         
+       0.817097816 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 226
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.547380e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.581051e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.581051e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.521018e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.553268e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.553268e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     6.917943 sec
+TOTAL       :     7.003787 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-    20,590,059,617      cycles                           #    2.973 GHz                    
-    52,050,938,989      instructions                     #    2.53  insn per cycle         
-       6.927193752 seconds time elapsed
+    20,471,020,649      cycles                           #    2.921 GHz                    
+    51,928,444,025      instructions                     #    2.54  insn per cycle         
+       7.009468548 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  655) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe
@@ -113,15 +117,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.762310e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.879212e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.879212e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.665203e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.774786e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.774786e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     3.935303 sec
+TOTAL       :     4.044714 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-    11,659,111,162      cycles                           #    2.956 GHz                    
-    30,715,351,599      instructions                     #    2.63  insn per cycle         
-       3.944612578 seconds time elapsed
+    11,506,047,617      cycles                           #    2.842 GHz                    
+    30,594,015,931      instructions                     #    2.66  insn per cycle         
+       4.050341042 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2970) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe
@@ -144,15 +148,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.631108e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.954751e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.954751e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.502333e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.811948e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.811948e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.401648 sec
+TOTAL       :     2.438893 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     6,824,462,536      cycles                           #    2.832 GHz                    
-    13,725,309,322      instructions                     #    2.01  insn per cycle         
-       2.410817230 seconds time elapsed
+     6,698,062,649      cycles                           #    2.741 GHz                    
+    13,603,721,480      instructions                     #    2.03  insn per cycle         
+       2.444469603 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3106) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe
@@ -175,15 +179,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.105035e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.496184e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.496184e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.940795e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.309966e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.309966e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.189054 sec
+TOTAL       :     2.232647 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     6,256,988,161      cycles                           #    2.848 GHz                    
-    13,091,196,075      instructions                     #    2.09  insn per cycle         
-       2.197929864 seconds time elapsed
+     6,140,627,215      cycles                           #    2.744 GHz                    
+    12,970,097,401      instructions                     #    2.11  insn per cycle         
+       2.238426687 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2839) (512y:  150) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest_cpp.exe
@@ -206,15 +210,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.274756e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.429596e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.429596e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.083008e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.221921e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.221921e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     3.340001 sec
+TOTAL       :     3.512125 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     6,530,704,290      cycles                           #    1.951 GHz                    
-     8,820,931,604      instructions                     #    1.35  insn per cycle         
-       3.348983212 seconds time elapsed
+     6,392,916,293      cycles                           #    1.818 GHz                    
+     8,698,960,285      instructions                     #    1.36  insn per cycle         
+       3.517835253 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1769) (512y:  130) (512z: 2012)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt
index 8f692fc05c..de193defe8 100644
--- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 51s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
-DATE: 2024-08-08_20:41:42
+DATE: 2024-08-30_00:12:09
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.985439e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.104211e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.186889e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.575688e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.092997e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.188730e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     0.520398 sec
+TOTAL       :     0.531136 sec
 INFO: No Floating Point Exceptions have been reported
-     2,215,259,816      cycles                           #    2.943 GHz                    
-     3,181,112,910      instructions                     #    1.44  insn per cycle         
-       0.810106845 seconds time elapsed
+     2,208,236,175      cycles                           #    2.863 GHz                    
+     3,176,316,980      instructions                     #    1.44  insn per cycle         
+       0.829765196 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.642914e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.679857e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.679857e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.597775e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.633618e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.633618e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     6.520897 sec
+TOTAL       :     6.673665 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-    19,742,813,002      cycles                           #    3.024 GHz                    
-    50,090,585,504      instructions                     #    2.54  insn per cycle         
-       6.530114912 seconds time elapsed
+    19,507,572,945      cycles                           #    2.921 GHz                    
+    49,961,631,538      instructions                     #    2.56  insn per cycle         
+       6.679180336 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  599) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe
@@ -113,15 +117,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.996801e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.132711e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.132711e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.893528e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.021968e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.021968e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     3.635789 sec
+TOTAL       :     3.733810 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-    11,015,177,767      cycles                           #    3.023 GHz                    
-    29,218,453,275      instructions                     #    2.65  insn per cycle         
-       3.644811061 seconds time elapsed
+    10,902,882,333      cycles                           #    2.917 GHz                    
+    29,099,087,878      instructions                     #    2.67  insn per cycle         
+       3.739349782 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2806) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe
@@ -144,15 +148,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.818882e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.034730e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.034730e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.711898e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.915395e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.915395e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.883629 sec
+TOTAL       :     2.935036 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     8,167,532,623      cycles                           #    2.824 GHz                    
-    15,289,290,626      instructions                     #    1.87  insn per cycle         
-       2.892785978 seconds time elapsed
+     8,019,016,096      cycles                           #    2.728 GHz                    
+    15,168,124,592      instructions                     #    1.89  insn per cycle         
+       2.940665569 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3190) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe
@@ -175,15 +179,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.019354e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.261718e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.261718e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.915818e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.144411e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.144411e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.748891 sec
+TOTAL       :     2.787579 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     7,796,139,330      cycles                           #    2.827 GHz                    
-    14,598,894,712      instructions                     #    1.87  insn per cycle         
-       2.758146376 seconds time elapsed
+     7,662,521,181      cycles                           #    2.744 GHz                    
+    14,477,655,900      instructions                     #    1.89  insn per cycle         
+       2.793241788 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2762) (512y:  304) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest_cpp.exe
@@ -206,15 +210,15 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.130478e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.273768e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.273768e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.007999e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.141501e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.141501e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     3.488340 sec
+TOTAL       :     3.595624 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     6,648,747,235      cycles                           #    1.902 GHz                    
-    10,013,894,735      instructions                     #    1.51  insn per cycle         
-       3.497416797 seconds time elapsed
+     6,539,430,615      cycles                           #    1.817 GHz                    
+     9,892,281,397      instructions                     #    1.51  insn per cycle         
+       3.601324479 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1542) (512y:  216) (512z: 2216)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt
index ad80cd52ba..fa7b02a6a2 100644
--- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 01m 15s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
-DATE: 2024-08-08_20:38:36
+DATE: 2024-08-30_00:09:01
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.191569e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.214197e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.217917e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.194339e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.220510e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.224134e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.458797 sec
+TOTAL       :     0.466393 sec
 INFO: No Floating Point Exceptions have been reported
-     1,983,013,526      cycles                           #    2.927 GHz                    
-     2,898,600,678      instructions                     #    1.46  insn per cycle         
-       0.735167670 seconds time elapsed
+     1,946,789,529      cycles                           #    2.848 GHz                    
+     2,840,228,076      instructions                     #    1.46  insn per cycle         
+       0.743222441 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.853741e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.992878e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.001850e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.849118e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.996884e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.006314e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 8.048215e-03 +- 4.042405e-03 )  GeV^-4
-TOTAL       :     0.478795 sec
+TOTAL       :     0.482333 sec
 INFO: No Floating Point Exceptions have been reported
-     2,032,935,359      cycles                           #    2.895 GHz                    
-     3,002,750,539      instructions                     #    1.48  insn per cycle         
-       0.759651454 seconds time elapsed
+     2,040,122,455      cycles                           #    2.862 GHz                    
+     3,026,821,422      instructions                     #    1.48  insn per cycle         
+       0.771290598 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.535539e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.539012e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.539012e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.401682e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.405003e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.405003e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.151546 sec
+TOTAL       :     0.157394 sec
 INFO: No Floating Point Exceptions have been reported
-       468,124,472      cycles                           #    3.026 GHz                    
-     1,389,955,355      instructions                     #    2.97  insn per cycle         
-       0.155210727 seconds time elapsed
+       467,966,696      cycles                           #    2.914 GHz                    
+     1,389,665,974      instructions                     #    2.97  insn per cycle         
+       0.161243043 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3908) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.637495e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.649053e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.649053e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.393464e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.405879e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.405879e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.081392 sec
+TOTAL       :     0.084547 sec
 INFO: No Floating Point Exceptions have been reported
-       240,371,597      cycles                           #    2.843 GHz                    
-       693,129,674      instructions                     #    2.88  insn per cycle         
-       0.085091876 seconds time elapsed
+       240,797,416      cycles                           #    2.743 GHz                    
+       692,952,810      instructions                     #    2.88  insn per cycle         
+       0.088416716 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 9483) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.470591e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.476735e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.476735e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.416012e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.421968e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.421968e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.038239 sec
+TOTAL       :     0.039303 sec
 INFO: No Floating Point Exceptions have been reported
-       114,892,967      cycles                           #    2.759 GHz                    
-       258,045,984      instructions                     #    2.25  insn per cycle         
-       0.042251807 seconds time elapsed
+       114,539,539      cycles                           #    2.685 GHz                    
+       257,865,602      instructions                     #    2.25  insn per cycle         
+       0.043139255 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8496) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.699002e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.707705e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.707705e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.621088e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.629157e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.629157e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.033054 sec
+TOTAL       :     0.034613 sec
 INFO: No Floating Point Exceptions have been reported
-       102,370,235      cycles                           #    2.829 GHz                    
-       240,205,792      instructions                     #    2.35  insn per cycle         
-       0.036714327 seconds time elapsed
+       102,700,525      cycles                           #    2.709 GHz                    
+       239,920,525      instructions                     #    2.34  insn per cycle         
+       0.038479326 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8133) (512y:  150) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.284659e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.290558e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.290558e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.200711e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.205895e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.205895e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.043329 sec
+TOTAL       :     0.046254 sec
 INFO: No Floating Point Exceptions have been reported
-        89,664,319      cycles                           #    1.930 GHz                    
-       134,445,525      instructions                     #    1.50  insn per cycle         
-       0.047102954 seconds time elapsed
+        90,180,411      cycles                           #    1.814 GHz                    
+       134,242,334      instructions                     #    1.49  insn per cycle         
+       0.050311483 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1931) (512y:  126) (512z: 7089)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt
index ce829c6200..a8b8f29f83 100644
--- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 56s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
-DATE: 2024-08-08_20:38:46
+DATE: 2024-08-30_00:09:11
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.249020e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.272842e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.276725e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.239289e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.264501e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.268217e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.461905 sec
+TOTAL       :     0.464414 sec
 INFO: No Floating Point Exceptions have been reported
-     2,018,577,231      cycles                           #    2.927 GHz                    
-     2,882,435,680      instructions                     #    1.43  insn per cycle         
-       0.748301491 seconds time elapsed
+     1,976,573,019      cycles                           #    2.857 GHz                    
+     2,840,849,000      instructions                     #    1.44  insn per cycle         
+       0.748390902 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.955136e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.095621e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.108051e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.945199e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.094050e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.103648e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 8.048215e-03 +- 4.042405e-03 )  GeV^-4
-TOTAL       :     0.478584 sec
+TOTAL       :     0.483417 sec
 INFO: No Floating Point Exceptions have been reported
-     2,069,849,202      cycles                           #    2.946 GHz                    
-     3,022,582,128      instructions                     #    1.46  insn per cycle         
-       0.760103886 seconds time elapsed
+     2,042,844,461      cycles                           #    2.868 GHz                    
+     3,015,415,868      instructions                     #    1.48  insn per cycle         
+       0.770568338 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.498608e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.502028e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.502028e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.312575e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.315809e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.315809e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.152353 sec
+TOTAL       :     0.160867 sec
 INFO: No Floating Point Exceptions have been reported
-       465,735,866      cycles                           #    2.994 GHz                    
-     1,385,207,858      instructions                     #    2.97  insn per cycle         
-       0.156142730 seconds time elapsed
+       466,639,598      cycles                           #    2.839 GHz                    
+     1,385,046,855      instructions                     #    2.97  insn per cycle         
+       0.164971440 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3796) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.699480e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.712661e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.712661e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.419167e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.432939e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.432939e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.080022 sec
+TOTAL       :     0.083367 sec
 INFO: No Floating Point Exceptions have been reported
-       238,839,052      cycles                           #    2.875 GHz                    
-       689,228,820      instructions                     #    2.89  insn per cycle         
-       0.083649102 seconds time elapsed
+       239,228,784      cycles                           #    2.761 GHz                    
+       689,068,045      instructions                     #    2.88  insn per cycle         
+       0.087228912 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 9528) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.515936e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.522249e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.522249e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.403646e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.409530e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.409530e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.036065 sec
+TOTAL       :     0.038935 sec
 INFO: No Floating Point Exceptions have been reported
-       111,582,476      cycles                           #    2.848 GHz                    
-       253,551,951      instructions                     #    2.27  insn per cycle         
-       0.039739897 seconds time elapsed
+       112,606,036      cycles                           #    2.660 GHz                    
+       253,498,964      instructions                     #    2.25  insn per cycle         
+       0.042949663 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8451) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.680034e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.687653e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.687653e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.592122e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.599776e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.599776e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.032732 sec
+TOTAL       :     0.034497 sec
 INFO: No Floating Point Exceptions have been reported
-       100,255,842      cycles                           #    2.793 GHz                    
-       235,731,789      instructions                     #    2.35  insn per cycle         
-       0.036414093 seconds time elapsed
+       100,939,846      cycles                           #    2.666 GHz                    
+       235,610,346      instructions                     #    2.33  insn per cycle         
+       0.038438765 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8091) (512y:  150) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.271489e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.276895e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.276895e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.201717e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.206827e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.206827e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.042973 sec
+TOTAL       :     0.045438 sec
 INFO: No Floating Point Exceptions have been reported
-        87,728,536      cycles                           #    1.900 GHz                    
-       129,884,935      instructions                     #    1.48  insn per cycle         
-       0.046739732 seconds time elapsed
+        88,129,895      cycles                           #    1.803 GHz                    
+       129,668,800      instructions                     #    1.47  insn per cycle         
+       0.049473921 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1887) (512y:  126) (512z: 7093)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt
index 3f66e78e98..b30fdb0a04 100644
--- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 01m 01s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
-DATE: 2024-08-08_20:38:57
+DATE: 2024-08-30_00:09:22
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.450134e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.460503e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.463108e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.439642e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.450633e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.453577e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.188141e-04 +- 6.565202e-04 )  GeV^-4
-TOTAL       :     0.461786 sec
+TOTAL       :     0.468424 sec
 INFO: No Floating Point Exceptions have been reported
-     1,983,576,716      cycles                           #    2.936 GHz                    
-     2,917,710,082      instructions                     #    1.47  insn per cycle         
-       0.732112148 seconds time elapsed
+     1,979,854,233      cycles                           #    2.856 GHz                    
+     2,835,931,601      instructions                     #    1.43  insn per cycle         
+       0.750354883 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.144453e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.248650e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.259538e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.136161e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.244237e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.264717e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 8.020494e-03 +- 4.025605e-03 )  GeV^-4
-TOTAL       :     0.468413 sec
+TOTAL       :     0.471781 sec
 INFO: No Floating Point Exceptions have been reported
-     2,017,794,611      cycles                           #    2.933 GHz                    
-     2,930,677,889      instructions                     #    1.45  insn per cycle         
-       0.746841147 seconds time elapsed
+     1,955,602,473      cycles                           #    2.852 GHz                    
+     2,851,428,736      instructions                     #    1.46  insn per cycle         
+       0.744473958 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.555756e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.559328e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.559328e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.431844e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.435246e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.435246e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.177153e-04 +- 6.554185e-04 )  GeV^-4
-TOTAL       :     0.150880 sec
+TOTAL       :     0.156051 sec
 INFO: No Floating Point Exceptions have been reported
-       463,646,900      cycles                           #    3.010 GHz                    
-     1,382,054,083      instructions                     #    2.98  insn per cycle         
-       0.154571759 seconds time elapsed
+       464,275,904      cycles                           #    2.913 GHz                    
+     1,381,903,168      instructions                     #    2.98  insn per cycle         
+       0.159971759 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3058) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.231675e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.235936e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.235936e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.210164e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.214685e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.214685e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.177152e-04 +- 6.554185e-04 )  GeV^-4
-TOTAL       :     0.044706 sec
+TOTAL       :     0.045622 sec
 INFO: No Floating Point Exceptions have been reported
-       132,862,579      cycles                           #    2.773 GHz                    
-       372,176,524      instructions                     #    2.80  insn per cycle         
-       0.048442327 seconds time elapsed
+       133,102,979      cycles                           #    2.717 GHz                    
+       372,002,572      instructions                     #    2.79  insn per cycle         
+       0.049577231 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:10140) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.891678e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.915961e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.915961e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.685154e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.706602e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.706602e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.165746e-04 +- 6.542823e-04 )  GeV^-4
-TOTAL       :     0.020296 sec
+TOTAL       :     0.021709 sec
 INFO: No Floating Point Exceptions have been reported
-        65,005,087      cycles                           #    2.776 GHz                    
-       142,918,773      instructions                     #    2.20  insn per cycle         
-       0.023971535 seconds time elapsed
+        65,573,769      cycles                           #    2.621 GHz                    
+       142,812,899      instructions                     #    2.18  insn per cycle         
+       0.025544258 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 9237) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.201047e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.231393e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.231393e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.090980e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.120143e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.120143e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.165746e-04 +- 6.542823e-04 )  GeV^-4
-TOTAL       :     0.018450 sec
+TOTAL       :     0.019209 sec
 INFO: No Floating Point Exceptions have been reported
-        59,790,078      cycles                           #    2.765 GHz                    
-       132,888,839      instructions                     #    2.22  insn per cycle         
-       0.022153075 seconds time elapsed
+        60,449,624      cycles                           #    2.693 GHz                    
+       132,675,328      instructions                     #    2.19  insn per cycle         
+       0.023011897 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8951) (512y:   28) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.264475e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.284066e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.284066e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.349292e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.369797e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.369797e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.165747e-04 +- 6.542824e-04 )  GeV^-4
-TOTAL       :     0.025826 sec
+TOTAL       :     0.024693 sec
 INFO: No Floating Point Exceptions have been reported
-        53,398,285      cycles                           #    1.814 GHz                    
-        80,038,410      instructions                     #    1.50  insn per cycle         
-       0.029948894 seconds time elapsed
+        52,590,410      cycles                           #    1.867 GHz                    
+        79,499,137      instructions                     #    1.51  insn per cycle         
+       0.028818231 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2813) (512y:   32) (512z: 7440)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt
index c0ec66c0e5..7be61f3fb7 100644
--- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 55s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
-DATE: 2024-08-08_20:39:07
+DATE: 2024-08-30_00:09:32
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.475468e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.488915e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.493523e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.480611e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.492183e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.494882e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.188141e-04 +- 6.565202e-04 )  GeV^-4
-TOTAL       :     0.466666 sec
+TOTAL       :     0.466999 sec
 INFO: No Floating Point Exceptions have been reported
-     2,035,784,320      cycles                           #    2.932 GHz                    
-     2,916,651,120      instructions                     #    1.43  insn per cycle         
-       0.752059618 seconds time elapsed
+     1,994,032,599      cycles                           #    2.869 GHz                    
+     2,904,635,002      instructions                     #    1.46  insn per cycle         
+       0.752109461 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.233883e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.341900e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.353294e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.230787e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.343689e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.354925e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 8.020496e-03 +- 4.025606e-03 )  GeV^-4
-TOTAL       :     0.467271 sec
+TOTAL       :     0.468535 sec
 INFO: No Floating Point Exceptions have been reported
-     2,037,159,179      cycles                           #    2.946 GHz                    
-     2,882,523,885      instructions                     #    1.41  insn per cycle         
-       0.747816184 seconds time elapsed
+     1,974,074,352      cycles                           #    2.858 GHz                    
+     2,854,228,340      instructions                     #    1.45  insn per cycle         
+       0.747765066 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.551604e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.554949e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.554949e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.428329e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.431990e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.431990e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.177153e-04 +- 6.554185e-04 )  GeV^-4
-TOTAL       :     0.149984 sec
+TOTAL       :     0.155328 sec
 INFO: No Floating Point Exceptions have been reported
-       461,532,447      cycles                           #    3.013 GHz                    
-     1,376,849,888      instructions                     #    2.98  insn per cycle         
-       0.153697004 seconds time elapsed
+       461,794,147      cycles                           #    2.912 GHz                    
+     1,376,816,114      instructions                     #    2.98  insn per cycle         
+       0.159149031 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2930) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.248118e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.252450e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.252450e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.207284e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.211683e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.211683e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.177152e-04 +- 6.554185e-04 )  GeV^-4
-TOTAL       :     0.043499 sec
+TOTAL       :     0.044898 sec
 INFO: No Floating Point Exceptions have been reported
-       130,431,744      cycles                           #    2.801 GHz                    
-       367,402,317      instructions                     #    2.82  insn per cycle         
-       0.047010449 seconds time elapsed
+       130,815,214      cycles                           #    2.714 GHz                    
+       367,168,421      instructions                     #    2.81  insn per cycle         
+       0.048774302 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:10123) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.883527e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.907714e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.907714e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.693057e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.717106e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.717106e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.165746e-04 +- 6.542823e-04 )  GeV^-4
-TOTAL       :     0.019514 sec
+TOTAL       :     0.020927 sec
 INFO: No Floating Point Exceptions have been reported
-        62,991,896      cycles                           #    2.777 GHz                    
-       138,167,276      instructions                     #    2.19  insn per cycle         
-       0.023246200 seconds time elapsed
+        63,720,009      cycles                           #    2.625 GHz                    
+       137,963,649      instructions                     #    2.17  insn per cycle         
+       0.024841609 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 9191) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.044826e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.071557e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.071557e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.053369e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.084432e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.084432e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.165746e-04 +- 6.542823e-04 )  GeV^-4
-TOTAL       :     0.018654 sec
+TOTAL       :     0.018533 sec
 INFO: No Floating Point Exceptions have been reported
-        57,917,940      cycles                           #    2.662 GHz                    
-       128,096,344      instructions                     #    2.21  insn per cycle         
-       0.022204337 seconds time elapsed
+        58,283,780      cycles                           #    2.668 GHz                    
+       127,986,844      instructions                     #    2.20  insn per cycle         
+       0.022433226 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8907) (512y:   28) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.471457e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.494959e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.494959e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.325306e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.346291e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.346291e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.165747e-04 +- 6.542824e-04 )  GeV^-4
-TOTAL       :     0.022784 sec
+TOTAL       :     0.024145 sec
 INFO: No Floating Point Exceptions have been reported
-        50,131,984      cycles                           #    1.927 GHz                    
-        74,930,459      instructions                     #    1.49  insn per cycle         
-       0.026643138 seconds time elapsed
+        50,396,566      cycles                           #    1.827 GHz                    
+        74,723,558      instructions                     #    1.48  insn per cycle         
+       0.028196285 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2768) (512y:   32) (512z: 7442)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt
index a1cf964e05..56a82d3822 100644
--- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 01m 04s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
-DATE: 2024-08-08_20:39:18
+DATE: 2024-08-30_00:09:42
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.170281e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.193514e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.197230e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.176803e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.200380e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.203980e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.460249 sec
+TOTAL       :     0.463813 sec
 INFO: No Floating Point Exceptions have been reported
-     1,998,727,826      cycles                           #    2.929 GHz                    
-     2,887,597,557      instructions                     #    1.44  insn per cycle         
-       0.739044353 seconds time elapsed
+     1,994,126,897      cycles                           #    2.870 GHz                    
+     2,883,414,328      instructions                     #    1.45  insn per cycle         
+       0.751549273 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.840436e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.977655e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.986488e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.815791e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.963506e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.977197e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 8.048215e-03 +- 4.042405e-03 )  GeV^-4
-TOTAL       :     0.480871 sec
+TOTAL       :     0.480482 sec
 INFO: No Floating Point Exceptions have been reported
-     2,091,938,823      cycles                           #    2.936 GHz                    
-     3,079,530,757      instructions                     #    1.47  insn per cycle         
-       0.770600295 seconds time elapsed
+     2,017,162,366      cycles                           #    2.863 GHz                    
+     2,881,486,208      instructions                     #    1.43  insn per cycle         
+       0.761315241 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.326264e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.329481e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.329481e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.299636e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.302850e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.302850e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.161027 sec
+TOTAL       :     0.162201 sec
 INFO: No Floating Point Exceptions have been reported
-       471,923,848      cycles                           #    2.871 GHz                    
-     1,398,593,986      instructions                     #    2.96  insn per cycle         
-       0.164917375 seconds time elapsed
+       473,962,179      cycles                           #    2.864 GHz                    
+     1,398,561,506      instructions                     #    2.95  insn per cycle         
+       0.166136124 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3899) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.833451e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.846029e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.846029e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.462459e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.474456e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.474456e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.079301 sec
+TOTAL       :     0.083663 sec
 INFO: No Floating Point Exceptions have been reported
-       236,478,249      cycles                           #    2.865 GHz                    
-       688,183,765      instructions                     #    2.91  insn per cycle         
-       0.083009452 seconds time elapsed
+       238,006,925      cycles                           #    2.737 GHz                    
+       688,241,004      instructions                     #    2.89  insn per cycle         
+       0.087669570 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 9327) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.464519e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.470938e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.470938e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.408225e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.414801e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.414801e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.038027 sec
+TOTAL       :     0.039583 sec
 INFO: No Floating Point Exceptions have been reported
-       113,380,965      cycles                           #    2.745 GHz                    
-       253,222,188      instructions                     #    2.23  insn per cycle         
-       0.041829832 seconds time elapsed
+       113,789,328      cycles                           #    2.650 GHz                    
+       253,028,199      instructions                     #    2.22  insn per cycle         
+       0.043523208 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8351) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.697656e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.705927e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.705927e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.638322e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.646577e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.646577e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.033099 sec
+TOTAL       :     0.034260 sec
 INFO: No Floating Point Exceptions have been reported
-       100,842,922      cycles                           #    2.776 GHz                    
-       233,742,979      instructions                     #    2.32  insn per cycle         
-       0.036790218 seconds time elapsed
+       101,355,150      cycles                           #    2.697 GHz                    
+       233,580,541      instructions                     #    2.30  insn per cycle         
+       0.038214462 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 7489) (512y:  146) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.224753e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.229606e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.229606e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.177279e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.182707e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.182707e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.045294 sec
+TOTAL       :     0.047055 sec
 INFO: No Floating Point Exceptions have been reported
-        90,903,043      cycles                           #    1.874 GHz                    
-       133,303,472      instructions                     #    1.47  insn per cycle         
-       0.049138947 seconds time elapsed
+        91,140,824      cycles                           #    1.806 GHz                    
+       133,104,919      instructions                     #    1.46  insn per cycle         
+       0.051102614 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2061) (512y:  122) (512z: 6355)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt
index e66260167e..f8c6c4c9fb 100644
--- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 55s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
-DATE: 2024-08-08_20:39:28
+DATE: 2024-08-30_00:09:53
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.209121e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.235715e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.239868e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.215311e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.239767e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.243700e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.460488 sec
+TOTAL       :     0.461734 sec
 INFO: No Floating Point Exceptions have been reported
-     1,999,748,612      cycles                           #    2.928 GHz                    
-     2,930,247,263      instructions                     #    1.47  insn per cycle         
-       0.740595703 seconds time elapsed
+     1,988,470,447      cycles                           #    2.864 GHz                    
+     2,882,361,412      instructions                     #    1.45  insn per cycle         
+       0.750779396 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -67,15 +71,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.929472e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.072806e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.082157e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.928262e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.076946e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.087118e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 8.048215e-03 +- 4.042405e-03 )  GeV^-4
-TOTAL       :     0.482161 sec
+TOTAL       :     0.480694 sec
 INFO: No Floating Point Exceptions have been reported
-     2,061,793,455      cycles                           #    2.911 GHz                    
-     3,015,555,211      instructions                     #    1.46  insn per cycle         
-       0.766758571 seconds time elapsed
+     2,042,603,092      cycles                           #    2.854 GHz                    
+     2,995,512,508      instructions                     #    1.47  insn per cycle         
+       0.772035435 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -97,15 +101,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.493942e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.497215e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.497215e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.350374e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.354102e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.354102e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.152521 sec
+TOTAL       :     0.158989 sec
 INFO: No Floating Point Exceptions have been reported
-       469,652,977      cycles                           #    3.017 GHz                    
-     1,393,890,707      instructions                     #    2.97  insn per cycle         
-       0.156209215 seconds time elapsed
+       469,990,230      cycles                           #    2.882 GHz                    
+     1,393,593,071      instructions                     #    2.97  insn per cycle         
+       0.163678021 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3800) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe
@@ -126,15 +130,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.875866e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.888668e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.888668e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.649152e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.662415e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.662415e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.077991 sec
+TOTAL       :     0.080671 sec
 INFO: No Floating Point Exceptions have been reported
-       235,131,903      cycles                           #    2.896 GHz                    
-       684,356,235      instructions                     #    2.91  insn per cycle         
-       0.081716900 seconds time elapsed
+       235,246,937      cycles                           #    2.800 GHz                    
+       684,070,397      instructions                     #    2.91  insn per cycle         
+       0.084583044 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 9360) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe
@@ -155,15 +159,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.472431e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.478529e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.478529e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.433240e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.439350e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.439350e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.037179 sec
+TOTAL       :     0.038091 sec
 INFO: No Floating Point Exceptions have been reported
-       111,325,082      cycles                           #    2.760 GHz                    
-       248,775,647      instructions                     #    2.23  insn per cycle         
-       0.040876097 seconds time elapsed
+       111,471,077      cycles                           #    2.695 GHz                    
+       248,563,445      instructions                     #    2.23  insn per cycle         
+       0.041951935 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8304) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe
@@ -184,15 +188,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.697458e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.705090e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.705090e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.616149e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.623778e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.623778e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.032417 sec
+TOTAL       :     0.033986 sec
 INFO: No Floating Point Exceptions have been reported
-        98,963,466      cycles                           #    2.782 GHz                    
-       229,303,120      instructions                     #    2.32  insn per cycle         
-       0.036104618 seconds time elapsed
+        99,756,711      cycles                           #    2.656 GHz                    
+       229,195,549      instructions                     #    2.30  insn per cycle         
+       0.038182932 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 7440) (512y:  146) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest_cpp.exe
@@ -213,15 +217,15 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.256457e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.261478e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.261478e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.146301e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.151526e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.151526e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.043443 sec
+TOTAL       :     0.047514 sec
 INFO: No Floating Point Exceptions have been reported
-        88,868,110      cycles                           #    1.900 GHz                    
-       128,801,312      instructions                     #    1.45  insn per cycle         
-       0.047318950 seconds time elapsed
+        89,067,537      cycles                           #    1.748 GHz                    
+       128,503,438      instructions                     #    1.44  insn per cycle         
+       0.051548715 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2012) (512y:  122) (512z: 6355)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt
index ef58048b29..8cc24156a7 100644
--- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 01m 17s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
-DATE: 2024-08-08_20:37:25
+DATE: 2024-08-30_00:07:50
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.665934e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.063349e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.406343e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.656669e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.778365e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.356760e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.506392 sec
+TOTAL       :     0.518235 sec
 INFO: No Floating Point Exceptions have been reported
-     2,172,824,039      cycles                           #    2.952 GHz                    
-     3,090,027,466      instructions                     #    1.42  insn per cycle         
-       0.793282296 seconds time elapsed
+     2,154,268,990      cycles                           #    2.871 GHz                    
+     3,045,863,111      instructions                     #    1.41  insn per cycle         
+       0.808861715 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 132
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.134117e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.048218e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.048218e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.039451e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.032202e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.032202e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     1.290483 sec
+TOTAL       :     1.272909 sec
 INFO: No Floating Point Exceptions have been reported
-     3,847,248,044      cycles                           #    2.962 GHz                    
-     9,842,303,730      instructions                     #    2.56  insn per cycle         
-       1.299592545 seconds time elapsed
+     3,733,770,917      cycles                           #    2.921 GHz                    
+     9,720,651,198      instructions                     #    2.60  insn per cycle         
+       1.278835422 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  338) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.531336e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.978158e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.978158e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.503983e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.929451e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.929451e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.826770 sec
+TOTAL       :     0.811462 sec
 INFO: No Floating Point Exceptions have been reported
-     2,453,692,398      cycles                           #    2.938 GHz                    
-     6,052,098,536      instructions                     #    2.47  insn per cycle         
-       0.835919362 seconds time elapsed
+     2,332,671,266      cycles                           #    2.856 GHz                    
+     5,927,947,879      instructions                     #    2.54  insn per cycle         
+       0.817546181 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1376) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.266889e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.345995e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.345995e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.225656e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.249963e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.249963e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.606570 sec
+TOTAL       :     0.584802 sec
 INFO: No Floating Point Exceptions have been reported
-     1,785,899,086      cycles                           #    2.902 GHz                    
-     3,437,083,551      instructions                     #    1.92  insn per cycle         
-       0.616030368 seconds time elapsed
+     1,652,529,819      cycles                           #    2.801 GHz                    
+     3,311,190,823      instructions                     #    2.00  insn per cycle         
+       0.590668347 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1492) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.357485e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.522198e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.522198e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.303666e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.389768e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.389768e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.586533 sec
+TOTAL       :     0.568352 sec
 INFO: No Floating Point Exceptions have been reported
-     1,741,529,265      cycles                           #    2.926 GHz                    
-     3,407,397,649      instructions                     #    1.96  insn per cycle         
-       0.595838672 seconds time elapsed
+     1,605,893,876      cycles                           #    2.800 GHz                    
+     3,280,964,739      instructions                     #    2.04  insn per cycle         
+       0.574230340 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1368) (512y:   96) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.227600e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.220282e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.220282e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.105549e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.103411e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.103411e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.613174 sec
+TOTAL       :     0.611625 sec
 INFO: No Floating Point Exceptions have been reported
-     1,478,751,325      cycles                           #    2.377 GHz                    
-     2,546,932,482      instructions                     #    1.72  insn per cycle         
-       0.622601431 seconds time elapsed
+     1,388,086,070      cycles                           #    2.251 GHz                    
+     2,420,953,576      instructions                     #    1.74  insn per cycle         
+       0.617361867 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  568) (512y:   60) (512z: 1020)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt
index 8c70303d63..10a294cce6 100644
--- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 53s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
-DATE: 2024-08-08_20:37:37
+DATE: 2024-08-30_00:08:02
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.814897e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.661637e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.796070e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.790264e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.282286e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.700064e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.507946 sec
+TOTAL       :     0.519308 sec
 INFO: No Floating Point Exceptions have been reported
-     2,214,460,924      cycles                           #    2.958 GHz                    
-     3,109,800,964      instructions                     #    1.40  insn per cycle         
-       0.807528636 seconds time elapsed
+     2,148,852,822      cycles                           #    2.856 GHz                    
+     3,052,949,716      instructions                     #    1.42  insn per cycle         
+       0.810262458 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.340535e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.067339e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.067339e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.116112e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.042418e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.042418e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     1.264960 sec
+TOTAL       :     1.261174 sec
 INFO: No Floating Point Exceptions have been reported
-     3,833,057,387      cycles                           #    3.009 GHz                    
-     9,733,259,839      instructions                     #    2.54  insn per cycle         
-       1.274559461 seconds time elapsed
+     3,716,003,421      cycles                           #    2.935 GHz                    
+     9,602,402,440      instructions                     #    2.58  insn per cycle         
+       1.266900529 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  356) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.542135e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.989720e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.989720e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.438745e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.829743e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.829743e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.822438 sec
+TOTAL       :     0.843334 sec
 INFO: No Floating Point Exceptions have been reported
-     2,444,623,828      cycles                           #    2.942 GHz                    
-     6,004,739,844      instructions                     #    2.46  insn per cycle         
-       0.831745892 seconds time elapsed
+     2,333,178,489      cycles                           #    2.750 GHz                    
+     5,873,605,419      instructions                     #    2.52  insn per cycle         
+       0.849150055 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1342) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.232544e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.257016e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.257016e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.237965e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.263926e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.263926e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.613019 sec
+TOTAL       :     0.580296 sec
 INFO: No Floating Point Exceptions have been reported
-     1,777,339,853      cycles                           #    2.859 GHz                    
-     3,416,813,174      instructions                     #    1.92  insn per cycle         
-       0.622385987 seconds time elapsed
+     1,642,724,354      cycles                           #    2.807 GHz                    
+     3,283,919,836      instructions                     #    2.00  insn per cycle         
+       0.585984683 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1429) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.366185e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.542246e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.542246e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.312622e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.424487e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.424487e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.584170 sec
+TOTAL       :     0.567544 sec
 INFO: No Floating Point Exceptions have been reported
-     1,729,011,734      cycles                           #    2.917 GHz                    
-     3,386,515,960      instructions                     #    1.96  insn per cycle         
-       0.593372914 seconds time elapsed
+     1,607,071,100      cycles                           #    2.807 GHz                    
+     3,257,865,615      instructions                     #    2.03  insn per cycle         
+       0.573398911 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1321) (512y:   96) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.212793e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.204561e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.204561e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.175885e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.130445e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.130445e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.617575 sec
+TOTAL       :     0.594395 sec
 INFO: No Floating Point Exceptions have been reported
-     1,500,885,532      cycles                           #    2.396 GHz                    
-     2,536,856,422      instructions                     #    1.69  insn per cycle         
-       0.627161657 seconds time elapsed
+     1,365,120,671      cycles                           #    2.277 GHz                    
+     2,405,716,159      instructions                     #    1.76  insn per cycle         
+       0.600141906 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  535) (512y:   60) (512z: 1006)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt
index 854849f5b9..bba9b80c38 100644
--- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 01m 02s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
-DATE: 2024-08-08_20:37:49
+DATE: 2024-08-30_00:08:14
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.471582e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.082860e+09                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.730798e+09                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.266123e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.010227e+09                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.720164e+09                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486732e-01 +- 3.293572e-05 )  GeV^0
-TOTAL       :     0.477544 sec
+TOTAL       :     0.480550 sec
 INFO: No Floating Point Exceptions have been reported
-     2,060,886,859      cycles                           #    2.928 GHz                    
-     2,892,344,882      instructions                     #    1.40  insn per cycle         
-       0.762313323 seconds time elapsed
+     2,017,865,131      cycles                           #    2.862 GHz                    
+     2,870,566,692      instructions                     #    1.42  insn per cycle         
+       0.762778204 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 100
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.384427e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.077691e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.077691e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.059132e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.039599e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.039599e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293563e-05 )  GeV^0
-TOTAL       :     1.212857 sec
+TOTAL       :     1.247370 sec
 INFO: No Floating Point Exceptions have been reported
-     3,671,434,294      cycles                           #    3.013 GHz                    
-     9,632,126,320      instructions                     #    2.62  insn per cycle         
-       1.219246655 seconds time elapsed
+     3,640,038,117      cycles                           #    2.907 GHz                    
+     9,595,863,482      instructions                     #    2.64  insn per cycle         
+       1.252756564 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  462) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.313604e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.570590e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.570590e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.211831e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.341257e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.341257e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293563e-05 )  GeV^0
-TOTAL       :     0.557914 sec
+TOTAL       :     0.563087 sec
 INFO: No Floating Point Exceptions have been reported
-     1,698,515,028      cycles                           #    3.014 GHz                    
-     3,997,527,782      instructions                     #    2.35  insn per cycle         
-       0.564171143 seconds time elapsed
+     1,628,390,678      cycles                           #    2.868 GHz                    
+     3,963,449,124      instructions                     #    2.43  insn per cycle         
+       0.568604349 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1578) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.069297e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.474961e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.474961e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.010082e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.324116e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.324116e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293562e-05 )  GeV^0
-TOTAL       :     0.435063 sec
+TOTAL       :     0.436315 sec
 INFO: No Floating Point Exceptions have been reported
-     1,286,599,575      cycles                           #    2.919 GHz                    
-     2,528,332,939      instructions                     #    1.97  insn per cycle         
-       0.441354656 seconds time elapsed
+     1,251,368,298      cycles                           #    2.837 GHz                    
+     2,493,850,083      instructions                     #    1.99  insn per cycle         
+       0.441849585 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1910) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.180191e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.819453e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.819453e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.127180e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.591382e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.591382e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293562e-05 )  GeV^0
-TOTAL       :     0.425326 sec
+TOTAL       :     0.424520 sec
 INFO: No Floating Point Exceptions have been reported
-     1,261,525,072      cycles                           #    2.926 GHz                    
-     2,504,983,030      instructions                     #    1.99  insn per cycle         
-       0.431704777 seconds time elapsed
+     1,217,161,488      cycles                           #    2.835 GHz                    
+     2,468,132,372      instructions                     #    2.03  insn per cycle         
+       0.430034667 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1855) (512y:    1) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.850782e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.787254e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.787254e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.904252e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.911507e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.911507e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293561e-05 )  GeV^0
-TOTAL       :     0.464725 sec
+TOTAL       :     0.451099 sec
 INFO: No Floating Point Exceptions have been reported
-     1,108,955,129      cycles                           #    2.357 GHz                    
-     2,107,952,878      instructions                     #    1.90  insn per cycle         
-       0.471172185 seconds time elapsed
+     1,074,595,880      cycles                           #    2.357 GHz                    
+     2,071,376,740      instructions                     #    1.93  insn per cycle         
+       0.456603914 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1039) (512y:    5) (512z: 1290)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt
index 24f2cc254b..d10e1afaa5 100644
--- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 53s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
-DATE: 2024-08-08_20:38:01
+DATE: 2024-08-30_00:08:25
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.481519e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.098490e+09                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.734508e+09                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.263978e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.009055e+09                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.720229e+09                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486732e-01 +- 3.293572e-05 )  GeV^0
-TOTAL       :     0.480270 sec
+TOTAL       :     0.484875 sec
 INFO: No Floating Point Exceptions have been reported
-     2,041,258,883      cycles                           #    2.865 GHz                    
-     2,919,368,257      instructions                     #    1.43  insn per cycle         
-       0.770727877 seconds time elapsed
+     2,018,138,819      cycles                           #    2.844 GHz                    
+     2,860,124,556      instructions                     #    1.42  insn per cycle         
+       0.768540832 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 93
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.423477e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.084213e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.084213e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.165432e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.053698e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.053698e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293563e-05 )  GeV^0
-TOTAL       :     1.208276 sec
+TOTAL       :     1.231345 sec
 INFO: No Floating Point Exceptions have been reported
-     3,647,443,455      cycles                           #    3.005 GHz                    
-     9,504,212,055      instructions                     #    2.61  insn per cycle         
-       1.214581993 seconds time elapsed
+     3,613,467,441      cycles                           #    2.924 GHz                    
+     9,465,330,613      instructions                     #    2.62  insn per cycle         
+       1.236653003 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  366) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.204450e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.296384e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.296384e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.209580e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.334762e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.334762e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293563e-05 )  GeV^0
-TOTAL       :     0.572123 sec
+TOTAL       :     0.562802 sec
 INFO: No Floating Point Exceptions have been reported
-     1,666,311,430      cycles                           #    2.883 GHz                    
-     3,968,199,942      instructions                     #    2.38  insn per cycle         
-       0.578517715 seconds time elapsed
+     1,626,298,538      cycles                           #    2.865 GHz                    
+     3,929,468,943      instructions                     #    2.42  insn per cycle         
+       0.568304538 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1516) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.086457e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.476966e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.476966e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.014044e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.299344e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.299344e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293562e-05 )  GeV^0
-TOTAL       :     0.433372 sec
+TOTAL       :     0.435235 sec
 INFO: No Floating Point Exceptions have been reported
-     1,287,648,503      cycles                           #    2.933 GHz                    
-     2,519,527,968      instructions                     #    1.96  insn per cycle         
-       0.439715000 seconds time elapsed
+     1,243,896,160      cycles                           #    2.827 GHz                    
+     2,478,597,676      instructions                     #    1.99  insn per cycle         
+       0.440693366 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1801) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.137610e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.760529e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.760529e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.125580e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.589773e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.589773e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293562e-05 )  GeV^0
-TOTAL       :     0.429722 sec
+TOTAL       :     0.422774 sec
 INFO: No Floating Point Exceptions have been reported
-     1,269,495,412      cycles                           #    2.915 GHz                    
-     2,496,260,070      instructions                     #    1.97  insn per cycle         
-       0.436264737 seconds time elapsed
+     1,213,663,106      cycles                           #    2.839 GHz                    
+     2,455,056,340      instructions                     #    2.02  insn per cycle         
+       0.428143641 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1764) (512y:    1) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.044380e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.291761e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.291761e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.932782e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.989263e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.989263e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293561e-05 )  GeV^0
-TOTAL       :     0.438334 sec
+TOTAL       :     0.445733 sec
 INFO: No Floating Point Exceptions have been reported
-     1,106,020,121      cycles                           #    2.491 GHz                    
-     2,096,224,924      instructions                     #    1.90  insn per cycle         
-       0.444840756 seconds time elapsed
+     1,066,104,747      cycles                           #    2.367 GHz                    
+     2,055,437,063      instructions                     #    1.93  insn per cycle         
+       0.451267064 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  934) (512y:    5) (512z: 1271)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt
index 097ec6962d..4acbefe345 100644
--- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 01m 02s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
-DATE: 2024-08-08_20:38:12
+DATE: 2024-08-30_00:08:36
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.657009e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.040901e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.368076e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.660194e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.763645e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.371516e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.510823 sec
+TOTAL       :     0.518809 sec
 INFO: No Floating Point Exceptions have been reported
-     2,202,406,007      cycles                           #    2.933 GHz                    
-     3,131,483,968      instructions                     #    1.42  insn per cycle         
-       0.809574698 seconds time elapsed
+     2,144,176,882      cycles                           #    2.847 GHz                    
+     3,054,167,922      instructions                     #    1.42  insn per cycle         
+       0.809452032 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 132
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.987871e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.027797e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.027797e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.940769e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.019109e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.019109e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     1.312691 sec
+TOTAL       :     1.286203 sec
 INFO: No Floating Point Exceptions have been reported
-     3,886,479,162      cycles                           #    2.942 GHz                    
-     9,876,785,784      instructions                     #    2.54  insn per cycle         
-       1.321966236 seconds time elapsed
+     3,776,980,405      cycles                           #    2.925 GHz                    
+     9,745,669,027      instructions                     #    2.58  insn per cycle         
+       1.292070140 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  338) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.603482e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.083956e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.083956e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.493346e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.916716e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.916716e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.795166 sec
+TOTAL       :     0.815959 sec
 INFO: No Floating Point Exceptions have been reported
-     2,395,751,097      cycles                           #    2.981 GHz                    
-     6,041,369,753      instructions                     #    2.52  insn per cycle         
-       0.804292816 seconds time elapsed
+     2,288,270,177      cycles                           #    2.787 GHz                    
+     5,912,884,833      instructions                     #    2.58  insn per cycle         
+       0.821822014 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1409) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.333538e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.457835e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.457835e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.286080e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.371020e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.371020e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.593950 sec
+TOTAL       :     0.572108 sec
 INFO: No Floating Point Exceptions have been reported
-     1,751,397,279      cycles                           #    2.907 GHz                    
-     3,381,419,349      instructions                     #    1.93  insn per cycle         
-       0.603155882 seconds time elapsed
+     1,616,702,363      cycles                           #    2.801 GHz                    
+     3,250,741,760      instructions                     #    2.01  insn per cycle         
+       0.577859574 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1555) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.383716e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.579987e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.579987e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.334033e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.467430e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.467430e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.584649 sec
+TOTAL       :     0.562193 sec
 INFO: No Floating Point Exceptions have been reported
-     1,722,820,866      cycles                           #    2.904 GHz                    
-     3,335,061,421      instructions                     #    1.94  insn per cycle         
-       0.593900292 seconds time elapsed
+     1,592,350,337      cycles                           #    2.807 GHz                    
+     3,206,636,539      instructions                     #    2.01  insn per cycle         
+       0.567886333 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1434) (512y:  101) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.223321e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.217067e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.217067e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.160554e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.100255e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.100255e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.618111 sec
+TOTAL       :     0.600536 sec
 INFO: No Floating Point Exceptions have been reported
-     1,474,024,650      cycles                           #    2.351 GHz                    
-     2,505,057,782      instructions                     #    1.70  insn per cycle         
-       0.627415589 seconds time elapsed
+     1,346,255,520      cycles                           #    2.223 GHz                    
+     2,374,224,628      instructions                     #    1.76  insn per cycle         
+       0.606410866 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  744) (512y:   64) (512z: 1062)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt
index 909ea75534..1a9f8ec0a0 100644
--- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 54s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
-DATE: 2024-08-08_20:38:24
+DATE: 2024-08-30_00:08:49
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.791313e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.626392e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.791667e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.788888e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.340980e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.793695e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.506993 sec
+TOTAL       :     0.515813 sec
 INFO: No Floating Point Exceptions have been reported
-     2,160,282,873      cycles                           #    2.928 GHz                    
-     3,104,863,193      instructions                     #    1.44  insn per cycle         
-       0.795042821 seconds time elapsed
+     2,143,423,318      cycles                           #    2.860 GHz                    
+     3,067,491,467      instructions                     #    1.43  insn per cycle         
+       0.806493056 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.274915e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.058342e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.058342e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.981536e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.025136e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.025136e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     1.272460 sec
+TOTAL       :     1.279928 sec
 INFO: No Floating Point Exceptions have been reported
-     3,870,727,422      cycles                           #    3.021 GHz                    
-     9,766,927,758      instructions                     #    2.52  insn per cycle         
-       1.281884523 seconds time elapsed
+     3,757,035,204      cycles                           #    2.924 GHz                    
+     9,636,078,564      instructions                     #    2.56  insn per cycle         
+       1.285735539 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  356) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.623095e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.126207e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.126207e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.550168e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.006120e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.006120e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.787281 sec
+TOTAL       :     0.788402 sec
 INFO: No Floating Point Exceptions have been reported
-     2,408,985,457      cycles                           #    3.026 GHz                    
-     5,983,716,153      instructions                     #    2.48  insn per cycle         
-       0.796654714 seconds time elapsed
+     2,290,168,715      cycles                           #    2.887 GHz                    
+     5,855,355,373      instructions                     #    2.56  insn per cycle         
+       0.794115436 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1367) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.282374e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.352435e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.352435e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.244805e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.317031e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.317031e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.601451 sec
+TOTAL       :     0.580569 sec
 INFO: No Floating Point Exceptions have been reported
-     1,779,110,472      cycles                           #    2.917 GHz                    
-     3,343,155,447      instructions                     #    1.88  insn per cycle         
-       0.610581817 seconds time elapsed
+     1,633,570,710      cycles                           #    2.789 GHz                    
+     3,214,560,636      instructions                     #    1.97  insn per cycle         
+       0.586312446 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1471) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.404645e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.636849e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.636849e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.358491e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.525177e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.525177e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.577304 sec
+TOTAL       :     0.556422 sec
 INFO: No Floating Point Exceptions have been reported
-     1,713,534,680      cycles                           #    2.924 GHz                    
-     3,304,839,422      instructions                     #    1.93  insn per cycle         
-       0.586559957 seconds time elapsed
+     1,578,013,364      cycles                           #    2.811 GHz                    
+     3,178,344,168      instructions                     #    2.01  insn per cycle         
+       0.561965085 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1370) (512y:  101) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.274336e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.329961e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.329961e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.190170e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.161682e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.161682e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.603476 sec
+TOTAL       :     0.591028 sec
 INFO: No Floating Point Exceptions have been reported
-     1,481,795,981      cycles                           #    2.421 GHz                    
-     2,484,912,045      instructions                     #    1.68  insn per cycle         
-       0.612779368 seconds time elapsed
+     1,349,381,848      cycles                           #    2.264 GHz                    
+     2,358,384,697      instructions                     #    1.75  insn per cycle         
+       0.596719851 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  692) (512y:   64) (512z: 1053)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt
index 23a45578df..e62b93b708 100644
--- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 01m 17s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_20:35:05
+DATE: 2024-08-30_00:05:27
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.006324e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.190183e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.288100e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.645125e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.174225e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.284950e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     0.519336 sec
+TOTAL       :     0.530944 sec
 INFO: No Floating Point Exceptions have been reported
-     2,213,490,510      cycles                           #    2.944 GHz                    
-     3,142,609,105      instructions                     #    1.42  insn per cycle         
-       0.808787239 seconds time elapsed
+     2,186,537,838      cycles                           #    2.869 GHz                    
+     3,159,166,393      instructions                     #    1.44  insn per cycle         
+       0.821865584 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.848625e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.896982e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.896982e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.819724e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.866410e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.866410e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     5.805390 sec
+TOTAL       :     5.870423 sec
 INFO: No Floating Point Exceptions have been reported
-    17,322,328,356      cycles                           #    2.980 GHz                    
-    46,027,314,744      instructions                     #    2.66  insn per cycle         
-       5.814672958 seconds time elapsed
+    17,214,056,104      cycles                           #    2.930 GHz                    
+    45,926,568,401      instructions                     #    2.67  insn per cycle         
+       5.876101310 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  623) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.232999e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.394305e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.394305e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.190034e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.347526e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.347526e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     3.377455 sec
+TOTAL       :     3.395213 sec
 INFO: No Floating Point Exceptions have been reported
-    10,089,219,468      cycles                           #    2.980 GHz                    
-    27,901,985,402      instructions                     #    2.77  insn per cycle         
-       3.386689562 seconds time elapsed
+     9,982,315,760      cycles                           #    2.936 GHz                    
+    27,799,777,118      instructions                     #    2.78  insn per cycle         
+       3.401040734 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.131636e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.534601e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.534601e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.999649e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.380612e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.380612e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.174966 sec
+TOTAL       :     2.205225 sec
 INFO: No Floating Point Exceptions have been reported
-     6,180,272,446      cycles                           #    2.831 GHz                    
-    12,679,670,239      instructions                     #    2.05  insn per cycle         
-       2.183950081 seconds time elapsed
+     6,075,288,503      cycles                           #    2.749 GHz                    
+    12,582,306,777      instructions                     #    2.07  insn per cycle         
+       2.211180421 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2613) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.604193e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.099182e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.099182e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.454536e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.904963e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.904963e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.003125 sec
+TOTAL       :     2.030596 sec
 INFO: No Floating Point Exceptions have been reported
-     5,696,944,820      cycles                           #    2.832 GHz                    
-    12,097,133,291      instructions                     #    2.12  insn per cycle         
-       2.012150160 seconds time elapsed
+     5,581,988,175      cycles                           #    2.742 GHz                    
+    11,997,380,759      instructions                     #    2.15  insn per cycle         
+       2.036484083 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2356) (512y:  144) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.648289e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.842846e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.842846e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.473092e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.649502e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.649502e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     3.006654 sec
+TOTAL       :     3.126818 sec
 INFO: No Floating Point Exceptions have been reported
-     5,848,300,882      cycles                           #    1.940 GHz                    
-     8,438,808,313      instructions                     #    1.44  insn per cycle         
-       3.015775673 seconds time elapsed
+     5,723,367,750      cycles                           #    1.828 GHz                    
+     8,340,914,234      instructions                     #    1.46  insn per cycle         
+       3.132596047 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1456) (512y:  122) (512z: 1805)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt
index 084acffe25..dd7dc97e3b 100644
--- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 55s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_20:35:30
+DATE: 2024-08-30_00:05:52
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.973192e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.180411e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.278662e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.652880e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.169554e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.279516e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     0.518873 sec
+TOTAL       :     0.531091 sec
 INFO: No Floating Point Exceptions have been reported
-     2,217,952,324      cycles                           #    2.952 GHz                    
-     3,211,075,681      instructions                     #    1.45  insn per cycle         
-       0.807521486 seconds time elapsed
+     2,192,996,113      cycles                           #    2.837 GHz                    
+     3,138,073,320      instructions                     #    1.43  insn per cycle         
+       0.831810121 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.919771e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.971109e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.971109e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.864790e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.913608e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.913608e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     5.589458 sec
+TOTAL       :     5.729763 sec
 INFO: No Floating Point Exceptions have been reported
-    16,851,504,003      cycles                           #    3.011 GHz                    
-    45,007,980,146      instructions                     #    2.67  insn per cycle         
-       5.597787166 seconds time elapsed
+    16,766,275,374      cycles                           #    2.924 GHz                    
+    44,912,110,298      instructions                     #    2.68  insn per cycle         
+       5.735460834 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  567) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.433331e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.615119e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.615119e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.345782e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.518358e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.518358e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     3.183428 sec
+TOTAL       :     3.241480 sec
 INFO: No Floating Point Exceptions have been reported
-     9,605,830,601      cycles                           #    3.010 GHz                    
-    26,781,992,422      instructions                     #    2.79  insn per cycle         
-       3.191879831 seconds time elapsed
+     9,507,410,834      cycles                           #    2.929 GHz                    
+    26,685,640,419      instructions                     #    2.81  insn per cycle         
+       3.247226358 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2330) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.719654e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.056760e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.056760e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.592169e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.910029e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.910029e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.350234 sec
+TOTAL       :     2.390784 sec
 INFO: No Floating Point Exceptions have been reported
-     6,680,473,802      cycles                           #    2.833 GHz                    
-    14,206,471,082      instructions                     #    2.13  insn per cycle         
-       2.358807267 seconds time elapsed
+     6,586,629,911      cycles                           #    2.750 GHz                    
+    14,105,772,712      instructions                     #    2.14  insn per cycle         
+       2.396587613 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2697) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.858381e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.210770e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.210770e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.780110e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.122504e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.122504e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.286934 sec
+TOTAL       :     2.299915 sec
 INFO: No Floating Point Exceptions have been reported
-     6,467,572,645      cycles                           #    2.819 GHz                    
-    13,805,117,271      instructions                     #    2.13  insn per cycle         
-       2.295500484 seconds time elapsed
+     6,327,227,681      cycles                           #    2.745 GHz                    
+    13,699,353,623      instructions                     #    2.17  insn per cycle         
+       2.305670472 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2348) (512y:  297) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.556078e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.738376e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.738376e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.358446e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.524734e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.524734e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     3.078127 sec
+TOTAL       :     3.229645 sec
 INFO: No Floating Point Exceptions have been reported
-     6,022,357,803      cycles                           #    1.952 GHz                    
-    10,198,455,945      instructions                     #    1.69  insn per cycle         
-       3.086650563 seconds time elapsed
+     5,923,380,282      cycles                           #    1.832 GHz                    
+    10,098,206,916      instructions                     #    1.70  insn per cycle         
+       3.235349944 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1306) (512y:  208) (512z: 1985)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt
index 3eab9e9753..2ac6c8341c 100644
--- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 01m 03s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_20:35:54
+DATE: 2024-08-30_00:06:17
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.671843e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.219611e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.398007e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.712341e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.185127e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.398560e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072877e+00 +- 3.361153e-03 )  GeV^0
-TOTAL       :     0.483015 sec
+TOTAL       :     0.486465 sec
 INFO: No Floating Point Exceptions have been reported
-     2,057,665,691      cycles                           #    2.919 GHz                    
-     2,974,139,215      instructions                     #    1.45  insn per cycle         
-       0.763755746 seconds time elapsed
+     2,023,703,635      cycles                           #    2.842 GHz                    
+     2,905,407,935      instructions                     #    1.44  insn per cycle         
+       0.768933327 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 149
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.976573e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.032296e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.032296e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.920991e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.974908e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.974908e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072937e+00 +- 3.361545e-03 )  GeV^0
-TOTAL       :     5.392550 sec
+TOTAL       :     5.544585 sec
 INFO: No Floating Point Exceptions have been reported
-    16,223,721,004      cycles                           #    3.006 GHz                    
-    45,343,520,122      instructions                     #    2.79  insn per cycle         
-       5.398630583 seconds time elapsed
+    16,210,015,502      cycles                           #    2.921 GHz                    
+    45,320,815,835      instructions                     #    2.80  insn per cycle         
+       5.550177665 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  601) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.606915e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.959618e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.959618e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.506457e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.846418e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.846418e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072937e+00 +- 3.361544e-03 )  GeV^0
-TOTAL       :     2.365944 sec
+TOTAL       :     2.413252 sec
 INFO: No Floating Point Exceptions have been reported
-     7,142,483,054      cycles                           #    3.012 GHz                    
-    17,793,150,450      instructions                     #    2.49  insn per cycle         
-       2.371767516 seconds time elapsed
+     7,077,065,356      cycles                           #    2.927 GHz                    
+    17,770,661,239      instructions                     #    2.51  insn per cycle         
+       2.418827542 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3136) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.534145e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.726326e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.726326e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.282819e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.390404e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.390404e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072967e+00 +- 3.361967e-03 )  GeV^0
-TOTAL       :     1.317221 sec
+TOTAL       :     1.351583 sec
 INFO: No Floating Point Exceptions have been reported
-     3,766,549,622      cycles                           #    2.849 GHz                    
-     8,281,231,591      instructions                     #    2.20  insn per cycle         
-       1.323030863 seconds time elapsed
+     3,739,036,824      cycles                           #    2.757 GHz                    
+     8,260,771,297      instructions                     #    2.21  insn per cycle         
+       1.357115595 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3355) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.037857e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.038500e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.038500e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.783112e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.003814e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.003814e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072967e+00 +- 3.361967e-03 )  GeV^0
-TOTAL       :     1.247672 sec
+TOTAL       :     1.280052 sec
 INFO: No Floating Point Exceptions have been reported
-     3,572,380,687      cycles                           #    2.852 GHz                    
-     7,938,220,748      instructions                     #    2.22  insn per cycle         
-       1.253461191 seconds time elapsed
+     3,542,795,885      cycles                           #    2.758 GHz                    
+     7,916,136,647      instructions                     #    2.23  insn per cycle         
+       1.285449312 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3201) (512y:   20) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.780907e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.464899e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.464899e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.415170e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.052151e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.052151e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072967e+00 +- 3.361967e-03 )  GeV^0
-TOTAL       :     1.635161 sec
+TOTAL       :     1.721159 sec
 INFO: No Floating Point Exceptions have been reported
-     3,277,760,479      cycles                           #    1.999 GHz                    
-     6,118,650,971      instructions                     #    1.87  insn per cycle         
-       1.640889669 seconds time elapsed
+     3,264,706,701      cycles                           #    1.893 GHz                    
+     6,097,584,670      instructions                     #    1.87  insn per cycle         
+       1.726769804 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2294) (512y:   24) (512z: 2154)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt
index 95f2f81a67..bb153103dc 100644
--- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 54s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_20:36:15
+DATE: 2024-08-30_00:06:38
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.014048e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.487826e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.715050e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.119307e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.455409e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.728882e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072877e+00 +- 3.361153e-03 )  GeV^0
-TOTAL       :     0.479773 sec
+TOTAL       :     0.486421 sec
 INFO: No Floating Point Exceptions have been reported
-     2,021,404,320      cycles                           #    2.871 GHz                    
-     2,909,718,804      instructions                     #    1.44  insn per cycle         
-       0.763747586 seconds time elapsed
+     2,033,006,308      cycles                           #    2.862 GHz                    
+     2,873,303,358      instructions                     #    1.41  insn per cycle         
+       0.769599438 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.015289e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.073220e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.073220e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.956818e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.013845e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.013845e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072937e+00 +- 3.361545e-03 )  GeV^0
-TOTAL       :     5.290195 sec
+TOTAL       :     5.444034 sec
 INFO: No Floating Point Exceptions have been reported
-    15,992,452,194      cycles                           #    3.020 GHz                    
-    44,447,001,670      instructions                     #    2.78  insn per cycle         
-       5.296101650 seconds time elapsed
+    15,986,130,227      cycles                           #    2.934 GHz                    
+    44,427,962,929      instructions                     #    2.78  insn per cycle         
+       5.449527371 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  534) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.486417e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.979858e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.979858e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.304706e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.775665e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.775665e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072937e+00 +- 3.361544e-03 )  GeV^0
-TOTAL       :     2.001515 sec
+TOTAL       :     2.062387 sec
 INFO: No Floating Point Exceptions have been reported
-     6,083,399,365      cycles                           #    3.032 GHz                    
-    17,096,762,778      instructions                     #    2.81  insn per cycle         
-       2.007478242 seconds time elapsed
+     6,060,207,395      cycles                           #    2.932 GHz                    
+    17,068,760,146      instructions                     #    2.82  insn per cycle         
+       2.067941987 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2863) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.273384e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.901765e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.901765e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.034117e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.607363e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.607363e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072967e+00 +- 3.361967e-03 )  GeV^0
-TOTAL       :     1.760820 sec
+TOTAL       :     1.822223 sec
 INFO: No Floating Point Exceptions have been reported
-     5,038,046,690      cycles                           #    2.853 GHz                    
-    10,244,068,560      instructions                     #    2.03  insn per cycle         
-       1.766743334 seconds time elapsed
+     5,017,549,044      cycles                           #    2.747 GHz                    
+    10,219,780,372      instructions                     #    2.04  insn per cycle         
+       1.827614223 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3892) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.352422e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.995021e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.995021e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.106149e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.683031e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.683031e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072967e+00 +- 3.361967e-03 )  GeV^0
-TOTAL       :     1.739024 sec
+TOTAL       :     1.802302 sec
 INFO: No Floating Point Exceptions have been reported
-     4,995,379,501      cycles                           #    2.864 GHz                    
-    10,014,742,907      instructions                     #    2.00  insn per cycle         
-       1.744931983 seconds time elapsed
+     4,959,754,152      cycles                           #    2.745 GHz                    
+     9,989,877,535      instructions                     #    2.01  insn per cycle         
+       1.807732961 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3793) (512y:    2) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.909740e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.260066e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.260066e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.617211e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.936164e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.936164e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072967e+00 +- 3.361967e-03 )  GeV^0
-TOTAL       :     2.224170 sec
+TOTAL       :     2.356646 sec
 INFO: No Floating Point Exceptions have been reported
-     4,384,022,767      cycles                           #    1.967 GHz                    
-     8,465,829,971      instructions                     #    1.93  insn per cycle         
-       2.230123024 seconds time elapsed
+     4,364,327,832      cycles                           #    1.848 GHz                    
+     8,442,316,116      instructions                     #    1.93  insn per cycle         
+       2.362315380 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2782) (512y:    4) (512z: 2752)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt
index 3f2b21ab02..b209de599f 100644
--- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 01m 03s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_20:36:36
+DATE: 2024-08-30_00:07:00
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.111342e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.183781e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.280569e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.640674e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.169556e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.279935e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     0.516736 sec
+TOTAL       :     0.528492 sec
 INFO: No Floating Point Exceptions have been reported
-     2,204,839,521      cycles                           #    2.950 GHz                    
-     3,193,475,947      instructions                     #    1.45  insn per cycle         
-       0.804039579 seconds time elapsed
+     2,185,199,416      cycles                           #    2.838 GHz                    
+     3,106,274,692      instructions                     #    1.42  insn per cycle         
+       0.827757615 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.851387e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.898716e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.898716e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.793535e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.838632e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.838632e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     5.792449 sec
+TOTAL       :     5.953704 sec
 INFO: No Floating Point Exceptions have been reported
-    17,478,048,232      cycles                           #    3.014 GHz                    
-    46,175,878,133      instructions                     #    2.64  insn per cycle         
-       5.800949907 seconds time elapsed
+    17,390,993,785      cycles                           #    2.919 GHz                    
+    46,078,480,657      instructions                     #    2.65  insn per cycle         
+       5.959481036 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  623) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.302826e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.471365e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.471365e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.191739e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.349907e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.349907e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     3.305610 sec
+TOTAL       :     3.393170 sec
 INFO: No Floating Point Exceptions have been reported
-    10,029,884,170      cycles                           #    3.027 GHz                    
-    27,698,012,954      instructions                     #    2.76  insn per cycle         
-       3.314264877 seconds time elapsed
+     9,945,468,646      cycles                           #    2.927 GHz                    
+    27,597,530,111      instructions                     #    2.77  insn per cycle         
+       3.398902714 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2581) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.212203e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.631040e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.631040e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.990071e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.366649e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.366649e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.141280 sec
+TOTAL       :     2.209354 sec
 INFO: No Floating Point Exceptions have been reported
-     6,126,755,092      cycles                           #    2.851 GHz                    
-    12,585,784,837      instructions                     #    2.05  insn per cycle         
-       2.149799113 seconds time elapsed
+     6,023,134,211      cycles                           #    2.720 GHz                    
+    12,486,085,370      instructions                     #    2.07  insn per cycle         
+       2.215108609 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2765) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.714807e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.220314e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.220314e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.529154e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.998828e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.998828e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     1.966130 sec
+TOTAL       :     2.004558 sec
 INFO: No Floating Point Exceptions have been reported
-     5,614,473,659      cycles                           #    2.844 GHz                    
-    12,019,662,665      instructions                     #    2.14  insn per cycle         
-       1.974902809 seconds time elapsed
+     5,485,809,258      cycles                           #    2.730 GHz                    
+    11,922,343,140      instructions                     #    2.17  insn per cycle         
+       2.010441607 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2510) (512y:  146) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.735274e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.937488e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.937488e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.563890e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.750749e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.750749e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.937106 sec
+TOTAL       :     3.050811 sec
 INFO: No Floating Point Exceptions have been reported
-     5,684,383,017      cycles                           #    1.930 GHz                    
-     8,211,471,869      instructions                     #    1.44  insn per cycle         
-       2.945845267 seconds time elapsed
+     5,611,379,647      cycles                           #    1.836 GHz                    
+     8,110,650,078      instructions                     #    1.45  insn per cycle         
+       3.056719977 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1646) (512y:  126) (512z: 1865)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe
diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt
index 9ec77e6c2c..9758c0e4fb 100644
--- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt
@@ -1,4 +1,8 @@
 
+------------------------------------------------
+Preliminary build completed in 0d 00h 00m 54s
+------------------------------------------------
+
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -40,7 +44,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-08_20:37:00
+DATE: 2024-08-30_00:07:25
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -49,15 +53,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.087294e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.176774e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.273815e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.640042e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.163678e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.273008e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     0.521745 sec
+TOTAL       :     0.528347 sec
 INFO: No Floating Point Exceptions have been reported
-     2,190,333,356      cycles                           #    2.907 GHz                    
-     3,117,272,451      instructions                     #    1.42  insn per cycle         
-       0.811246203 seconds time elapsed
+     2,195,921,164      cycles                           #    2.863 GHz                    
+     3,159,163,941      instructions                     #    1.44  insn per cycle         
+       0.824586714 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -82,15 +86,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.899666e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.949679e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.949679e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.848710e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.896952e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.896952e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     5.649808 sec
+TOTAL       :     5.778145 sec
 INFO: No Floating Point Exceptions have been reported
-    17,042,397,704      cycles                           #    3.012 GHz                    
-    45,200,059,180      instructions                     #    2.65  insn per cycle         
-       5.658309716 seconds time elapsed
+    16,939,093,132      cycles                           #    2.929 GHz                    
+    45,097,100,323      instructions                     #    2.66  insn per cycle         
+       5.783860430 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  568) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe
@@ -111,15 +115,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.442760e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.623868e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.623868e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.316686e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.486642e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.486642e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     3.175173 sec
+TOTAL       :     3.267618 sec
 INFO: No Floating Point Exceptions have been reported
-     9,616,707,948      cycles                           #    3.021 GHz                    
-    26,345,303,385      instructions                     #    2.74  insn per cycle         
-       3.183844820 seconds time elapsed
+     9,524,753,937      cycles                           #    2.911 GHz                    
+    26,244,236,470      instructions                     #    2.76  insn per cycle         
+       3.273278507 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2385) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe
@@ -140,15 +144,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.409096e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.707370e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.707370e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.499453e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.802367e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.802367e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.509673 sec
+TOTAL       :     2.437149 sec
 INFO: No Floating Point Exceptions have been reported
-     6,823,505,729      cycles                           #    2.711 GHz                    
-    14,133,345,545      instructions                     #    2.07  insn per cycle         
-       2.518344311 seconds time elapsed
+     6,708,501,181      cycles                           #    2.747 GHz                    
+    14,027,891,193      instructions                     #    2.09  insn per cycle         
+       2.442844457 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2883) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe
@@ -169,15 +173,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.915857e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.278986e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.278986e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.730475e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.065255e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.065255e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.261621 sec
+TOTAL       :     2.323336 sec
 INFO: No Floating Point Exceptions have been reported
-     6,478,665,786      cycles                           #    2.855 GHz                    
-    13,612,638,339      instructions                     #    2.10  insn per cycle         
-       2.270008014 seconds time elapsed
+     6,393,451,931      cycles                           #    2.746 GHz                    
+    13,511,993,077      instructions                     #    2.11  insn per cycle         
+       2.329209524 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2519) (512y:  302) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe
@@ -198,15 +202,15 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.779798e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.989152e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.989152e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.589660e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.781904e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.781904e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.903794 sec
+TOTAL       :     3.028874 sec
 INFO: No Floating Point Exceptions have been reported
-     5,684,727,855      cycles                           #    1.953 GHz                    
-     9,307,942,112      instructions                     #    1.64  insn per cycle         
-       2.912446958 seconds time elapsed
+     5,571,310,646      cycles                           #    1.837 GHz                    
+     9,204,211,713      instructions                     #    1.65  insn per cycle         
+       3.034661225 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1431) (512y:  212) (512z: 2058)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe

From 1fbe87a13fc194b7673fa5c3ed56c1a2f9848a4c Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Fri, 30 Aug 2024 07:16:16 +0200
Subject: [PATCH 35/50] [helas] rerun 30 tmad tests on itscrd90 - all as
 expected (failures in heft #833)

STARTED  AT Fri Aug 30 12:48:22 AM CEST 2024
(SM tests)
ENDED(1) AT Fri Aug 30 05:04:05 AM CEST 2024 [Status=0]
(BSM tests)
ENDED(1) AT Fri Aug 30 05:14:35 AM CEST 2024 [Status=0]

24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inlL_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt
1 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt
24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt
---
 .../log_eemumu_mad_d_inl0_hrd0.txt            | 136 +++++++--------
 .../log_eemumu_mad_f_inl0_hrd0.txt            | 146 ++++++++--------
 .../log_eemumu_mad_m_inl0_hrd0.txt            | 136 +++++++--------
 .../log_ggtt_mad_d_inl0_hrd0.txt              | 134 +++++++--------
 .../log_ggtt_mad_f_inl0_hrd0.txt              | 134 +++++++--------
 .../log_ggtt_mad_m_inl0_hrd0.txt              | 136 +++++++--------
 .../log_ggttg_mad_d_inl0_hrd0.txt             | 138 +++++++--------
 .../log_ggttg_mad_f_inl0_hrd0.txt             | 146 ++++++++--------
 .../log_ggttg_mad_m_inl0_hrd0.txt             | 136 +++++++--------
 .../log_ggttgg_mad_d_inl0_hrd0.txt            | 154 ++++++++---------
 .../log_ggttgg_mad_f_inl0_hrd0.txt            | 156 ++++++++---------
 .../log_ggttgg_mad_m_inl0_hrd0.txt            | 152 ++++++++---------
 .../log_ggttggg_mad_d_inl0_hrd0.txt           | 156 ++++++++---------
 .../log_ggttggg_mad_f_inl0_hrd0.txt           | 154 ++++++++---------
 .../log_ggttggg_mad_m_inl0_hrd0.txt           | 158 +++++++++---------
 .../log_gqttq_mad_d_inl0_hrd0.txt             | 136 +++++++--------
 .../log_gqttq_mad_f_inl0_hrd0.txt             | 138 +++++++--------
 .../log_gqttq_mad_m_inl0_hrd0.txt             | 140 ++++++++--------
 .../log_heftggbb_mad_d_inl0_hrd0.txt          | 134 +++++++--------
 .../log_heftggbb_mad_f_inl0_hrd0.txt          |  34 ++--
 .../log_heftggbb_mad_m_inl0_hrd0.txt          | 142 ++++++++--------
 .../log_smeftggtttt_mad_d_inl0_hrd0.txt       | 146 ++++++++--------
 .../log_smeftggtttt_mad_f_inl0_hrd0.txt       | 154 ++++++++---------
 .../log_smeftggtttt_mad_m_inl0_hrd0.txt       | 158 +++++++++---------
 .../log_susyggt1t1_mad_d_inl0_hrd0.txt        | 132 +++++++--------
 .../log_susyggt1t1_mad_f_inl0_hrd0.txt        | 130 +++++++-------
 .../log_susyggt1t1_mad_m_inl0_hrd0.txt        | 128 +++++++-------
 .../log_susyggtt_mad_d_inl0_hrd0.txt          | 138 +++++++--------
 .../log_susyggtt_mad_f_inl0_hrd0.txt          | 134 +++++++--------
 .../log_susyggtt_mad_m_inl0_hrd0.txt          | 142 ++++++++--------
 30 files changed, 2079 insertions(+), 2079 deletions(-)

diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt
index 01107f564b..905d729f9f 100644
--- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt
@@ -1,10 +1,10 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 
 
+
 make USEBUILDDIR=1 BACKEND=cuda
 
 make USEBUILDDIR=1 BACKEND=cppnone
-
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
 
@@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-08_20:42:55
+DATE: 2024-08-30_00:51:22
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
@@ -58,8 +58,8 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0
  [UNWEIGHT] Wrote 3798 events (found 8192 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6950s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6868s
+ [COUNTERS] PROGRAM TOTAL          :    0.7770s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7689s
  [COUNTERS] Fortran MEs      ( 1 ) :    0.0082s for     8192 events => throughput is 1.00E+06 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0
  [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1770s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1693s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0077s for     8192 events => throughput is 1.07E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.1876s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.1795s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0082s for     8192 events => throughput is 1.01E+06 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0
  [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3730s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2895s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0835s for    90112 events => throughput is 1.08E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3850s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2996s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0854s for    90112 events => throughput is 1.06E+06 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,10 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382715404661545E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1777s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1702s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0072s for     8192 events => throughput is 1.14E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [COUNTERS] PROGRAM TOTAL          :    0.1832s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.1756s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0072s for     8192 events => throughput is 1.13E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -167,10 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3648s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2879s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0766s for    90112 events => throughput is 1.18E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [COUNTERS] PROGRAM TOTAL          :    0.3761s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2962s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0795s for    90112 events => throughput is 1.13E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.167196e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.150373e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.165900e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.153459e+06                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,10 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1752s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1704s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0045s for     8192 events => throughput is 1.83E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [COUNTERS] PROGRAM TOTAL          :    0.1814s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.1765s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0046s for     8192 events => throughput is 1.80E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -245,9 +245,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3353s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2887s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0463s for    90112 events => throughput is 1.94E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3434s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2950s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0480s for    90112 events => throughput is 1.88E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.918558e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.890693e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.023579e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.963061e+06                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,9 +289,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1786s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1750s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0033s for     8192 events => throughput is 2.48E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.1825s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.1788s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0034s for     8192 events => throughput is 2.40E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -323,9 +323,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3295s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2928s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0363s for    90112 events => throughput is 2.48E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3319s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2956s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0359s for    90112 events => throughput is 2.51E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.640473e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.565379e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.831088e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.720140e+06                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,9 +367,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1752s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1718s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.65E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.1800s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.1763s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0034s for     8192 events => throughput is 2.39E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -401,9 +401,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3209s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2867s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0338s for    90112 events => throughput is 2.66E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3314s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2959s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0352s for    90112 events => throughput is 2.56E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.678759e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.605905e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.813366e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.811741e+06                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,9 +445,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1736s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1692s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0040s for     8192 events => throughput is 2.04E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.1801s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.1754s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0042s for     8192 events => throughput is 1.93E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -479,9 +479,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3322s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2913s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0405s for    90112 events => throughput is 2.22E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3433s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2992s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0437s for    90112 events => throughput is 2.06E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.108602e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.041894e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.253882e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.231255e+06                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,9 +523,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6096s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6084s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.32E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.6180s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6168s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.42E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -557,9 +557,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7166s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7111s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0049s for    90112 events => throughput is 1.84E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.7402s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7344s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0051s for    90112 events => throughput is 1.75E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.377977e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.310247e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.939853e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.937972e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.088090e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.121321e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.478718e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.494743e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.243737e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.121828e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.989285e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.011369e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.238682e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.127939e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.131222e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.127590e+08                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt
index 617aae1ec8..b3dd9fc681 100644
--- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt
@@ -1,22 +1,22 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 
 
-
 make USEBUILDDIR=1 BACKEND=cuda
+
 make USEBUILDDIR=1 BACKEND=cppnone
-make USEBUILDDIR=1 BACKEND=cppsse4
 
+make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+
+make USEBUILDDIR=1 BACKEND=cpp512y
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
-
-make USEBUILDDIR=1 BACKEND=cpp512y
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-08_20:43:11
+DATE: 2024-08-30_00:51:39
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0
  [UNWEIGHT] Wrote 3798 events (found 8192 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7259s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7175s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0084s for     8192 events => throughput is 9.72E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.7299s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7218s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0081s for     8192 events => throughput is 1.01E+06 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0
  [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1878s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1797s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0081s for     8192 events => throughput is 1.01E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.1894s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.1815s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0079s for     8192 events => throughput is 1.04E+06 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0
  [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3875s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3018s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0857s for    90112 events => throughput is 1.05E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3829s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2975s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0854s for    90112 events => throughput is 1.05E+06 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382703205998396E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1866s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1794s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0070s for     8192 events => throughput is 1.17E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.1854s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.1780s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0071s for     8192 events => throughput is 1.16E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09152 [9.1515590123565249E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3784s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3020s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0761s for    90112 events => throughput is 1.18E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3735s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2979s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0753s for    90112 events => throughput is 1.20E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.232262e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.222498e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.234403e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.223515e+06                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,10 +211,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382700723828302E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1808s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1776s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0028s for     8192 events => throughput is 2.88E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [COUNTERS] PROGRAM TOTAL          :    0.1791s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.1762s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0027s for     8192 events => throughput is 3.02E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -245,9 +245,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09152 [9.1515587612890761E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3276s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2977s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0297s for    90112 events => throughput is 3.03E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3264s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2967s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0295s for    90112 events => throughput is 3.06E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.119755e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.184263e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.282267e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.305557e+06                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,9 +289,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1827s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1799s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0025s for     8192 events => throughput is 3.27E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.1821s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.1792s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0026s for     8192 events => throughput is 3.14E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -323,10 +323,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3317s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3038s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0276s for    90112 events => throughput is 3.26E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
+ [COUNTERS] PROGRAM TOTAL          :    0.3282s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3003s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0277s for    90112 events => throughput is 3.25E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.481016e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.324945e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.570800e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.611979e+06                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,9 +367,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1855s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1828s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0024s for     8192 events => throughput is 3.35E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.1823s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.1796s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0025s for     8192 events => throughput is 3.31E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -401,10 +401,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3314s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3041s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0270s for    90112 events => throughput is 3.33E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [COUNTERS] PROGRAM TOTAL          :    0.3274s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3004s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0268s for    90112 events => throughput is 3.37E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.644439e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.421728e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.697078e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.720848e+06                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,9 +445,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382704335459282E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1845s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1814s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0027s for     8192 events => throughput is 3.04E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.1835s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.1805s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0027s for     8192 events => throughput is 3.08E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -479,9 +479,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09152 [9.1515591296252558E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3372s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3079s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0290s for    90112 events => throughput is 3.10E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3317s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3033s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0282s for    90112 events => throughput is 3.20E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.387501e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.234507e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.616268e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.458576e+06                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,10 +523,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382706077425631E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6084s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6073s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.48E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [COUNTERS] PROGRAM TOTAL          :    0.6162s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6151s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.46E+07 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -557,9 +557,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09152 [9.1515592892887687E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7292s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7238s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0048s for    90112 events => throughput is 1.86E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.7317s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7264s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0047s for    90112 events => throughput is 1.91E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.601368e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.706504e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.718163e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.545522e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.633474e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.466129e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.898384e+09                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.910413e+09                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.829286e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.590166e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.104797e+09                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.100934e+09                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.012752e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.844973e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.802072e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.803087e+08                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt
index e51bbf394d..6dc1fb2130 100644
--- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt
@@ -20,9 +20,9 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-08_20:43:26
+DATE: 2024-08-30_00:51:54
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0
  [UNWEIGHT] Wrote 3798 events (found 8192 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6983s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6906s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0077s for     8192 events => throughput is 1.06E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.7125s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7045s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0081s for     8192 events => throughput is 1.02E+06 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0
  [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1791s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1711s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0080s for     8192 events => throughput is 1.02E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.1842s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.1761s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0081s for     8192 events => throughput is 1.01E+06 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0
  [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3694s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2869s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0825s for    90112 events => throughput is 1.09E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3846s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2988s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0858s for    90112 events => throughput is 1.05E+06 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,10 +133,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382715420701395E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1846s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1767s
+ [COUNTERS] PROGRAM TOTAL          :    0.1833s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.1754s
  [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0075s for     8192 events => throughput is 1.09E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3660s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2865s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0792s for    90112 events => throughput is 1.14E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3781s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2967s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0811s for    90112 events => throughput is 1.11E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.124575e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.135905e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.154252e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.146239e+06                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,10 +211,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382715420701354E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1757s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1709s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0044s for     8192 events => throughput is 1.88E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [COUNTERS] PROGRAM TOTAL          :    0.1797s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.1749s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0044s for     8192 events => throughput is 1.86E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -245,9 +245,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3336s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2878s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0455s for    90112 events => throughput is 1.98E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3400s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2935s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0462s for    90112 events => throughput is 1.95E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.982594e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.989312e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.052848e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.057240e+06                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,10 +289,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1749s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1711s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0035s for     8192 events => throughput is 2.31E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [COUNTERS] PROGRAM TOTAL          :    0.1786s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.1748s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0035s for     8192 events => throughput is 2.37E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -323,10 +323,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3282s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2920s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0358s for    90112 events => throughput is 2.51E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [COUNTERS] PROGRAM TOTAL          :    0.3305s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2941s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0360s for    90112 events => throughput is 2.51E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.552156e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.520248e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.649390e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.584398e+06                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,9 +367,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1744s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1708s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0033s for     8192 events => throughput is 2.47E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.1789s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.1753s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0033s for     8192 events => throughput is 2.51E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -401,10 +401,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3217s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2876s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0337s for    90112 events => throughput is 2.67E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [COUNTERS] PROGRAM TOTAL          :    0.3318s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2962s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0352s for    90112 events => throughput is 2.56E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.650509e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.641406e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.719714e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.794868e+06                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,9 +445,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1750s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1712s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0035s for     8192 events => throughput is 2.36E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.1799s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.1755s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0040s for     8192 events => throughput is 2.05E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -479,9 +479,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3264s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2866s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0395s for    90112 events => throughput is 2.28E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3390s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2970s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0415s for    90112 events => throughput is 2.17E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.207219e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.196903e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.300574e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.277543e+06                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,9 +523,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09338 [9.3382715392009194E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.5992s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5980s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.38E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.6049s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6037s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.47E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -557,8 +557,8 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.09152 [9.1515602021089631E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7158s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7101s
+ [COUNTERS] PROGRAM TOTAL          :    0.7291s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7235s
  [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0050s for    90112 events => throughput is 1.80E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.054665e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.348676e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.970842e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.930073e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.242307e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.125050e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.491734e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.498988e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.221256e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.104130e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.104459e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.086621e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.208981e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.108048e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.160987e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.158026e+08                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt
index 8d24f348d7..931fcf1c66 100644
--- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt
@@ -1,10 +1,10 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 
+
 make USEBUILDDIR=1 BACKEND=cuda
 
 
 make USEBUILDDIR=1 BACKEND=cppnone
-
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
 
@@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-08_20:43:42
+DATE: 2024-08-30_00:52:11
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0
  [UNWEIGHT] Wrote 2601 events (found 5405 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8083s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7667s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0416s for     8192 events => throughput is 1.97E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8200s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7778s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0421s for     8192 events => throughput is 1.94E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4194s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3777s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0417s for     8192 events => throughput is 1.97E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4177s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3756s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0421s for     8192 events => throughput is 1.95E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0
  [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7491s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2980s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.4511s for    90112 events => throughput is 2.00E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.7786s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3175s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4612s for    90112 events => throughput is 1.95E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4196s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3765s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0426s for     8192 events => throughput is 1.92E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4176s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3724s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0448s for     8192 events => throughput is 1.83E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -167,10 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.11 [47.105695279989099] fbridge_mode=1
  [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7813s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2997s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4811s for    90112 events => throughput is 1.87E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [COUNTERS] PROGRAM TOTAL          :    1.8262s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3290s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4967s for    90112 events => throughput is 1.81E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.879822e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.846425e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.903748e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.873611e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,9 +211,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4107s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3853s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0250s for     8192 events => throughput is 3.28E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3985s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3726s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0255s for     8192 events => throughput is 3.21E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -245,9 +245,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.11 [47.105695279989106] fbridge_mode=1
  [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5717s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3004s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2709s for    90112 events => throughput is 3.33E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.6129s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3312s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2813s for    90112 events => throughput is 3.20E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.310019e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.265834e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.203674e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.263600e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,9 +289,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3916s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3758s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0153s for     8192 events => throughput is 5.35E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3886s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3723s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0158s for     8192 events => throughput is 5.18E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -323,9 +323,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.11 [47.105695279989135] fbridge_mode=1
  [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4759s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3059s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1696s for    90112 events => throughput is 5.31E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.4959s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3212s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1743s for    90112 events => throughput is 5.17E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.223657e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.224711e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.200982e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.211640e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,9 +367,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3953s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3808s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0141s for     8192 events => throughput is 5.80E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3872s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3723s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0144s for     8192 events => throughput is 5.68E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -401,9 +401,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.11 [47.105695279989135] fbridge_mode=1
  [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4542s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3022s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1516s for    90112 events => throughput is 5.94E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.4782s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3196s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1582s for    90112 events => throughput is 5.70E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.865744e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.751977e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.035557e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.909222e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,9 +445,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4098s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3854s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0239s for     8192 events => throughput is 3.43E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3958s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3729s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0224s for     8192 events => throughput is 3.66E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -479,9 +479,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.11 [47.105695279989135] fbridge_mode=1
  [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5428s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3039s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2384s for    90112 events => throughput is 3.78E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.5779s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3255s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2519s for    90112 events => throughput is 3.58E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.669812e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.640250e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.898434e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.582672e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,9 +523,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8047s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8033s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.27E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8092s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8078s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.31E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -557,9 +557,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1
  [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7304s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7231s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0066s for    90112 events => throughput is 1.37E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.7627s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.7552s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0068s for    90112 events => throughput is 1.33E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.008892e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.145115e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.654647e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.615882e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.331472e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.253648e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.082448e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.083832e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.310542e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.252346e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.160861e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.158233e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.331806e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.242136e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.063253e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.076942e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt
index 420861126b..7b5a930bcd 100644
--- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt
@@ -1,11 +1,11 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 
-make USEBUILDDIR=1 BACKEND=cuda
 
+make USEBUILDDIR=1 BACKEND=cuda
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make USEBUILDDIR=1 BACKEND=cppsse4
 
+make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
 
 make USEBUILDDIR=1 BACKEND=cpp512y
@@ -13,18 +13,18 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-08_20:44:09
+DATE: 2024-08-30_00:52:37
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0
  [UNWEIGHT] Wrote 2601 events (found 5405 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8019s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7604s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0415s for     8192 events => throughput is 1.97E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8269s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7846s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0423s for     8192 events => throughput is 1.94E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4215s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3800s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0416s for     8192 events => throughput is 1.97E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4157s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3732s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0425s for     8192 events => throughput is 1.93E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0
  [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7567s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3058s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.4510s for    90112 events => throughput is 2.00E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.7811s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3167s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4645s for    90112 events => throughput is 1.94E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094179692708323] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4203s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3790s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0410s for     8192 events => throughput is 2.00E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4144s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3720s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0420s for     8192 events => throughput is 1.95E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.11 [47.105688388783328] fbridge_mode=1
  [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7678s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3093s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4582s for    90112 events => throughput is 1.97E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.7840s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3184s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4652s for    90112 events => throughput is 1.94E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.984608e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.954584e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.996032e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.971648e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,9 +211,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094175707109216] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3923s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3751s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0170s for     8192 events => throughput is 4.83E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3908s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3733s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0173s for     8192 events => throughput is 4.75E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -245,9 +245,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.11 [47.105684583433771] fbridge_mode=1
  [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4893s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3053s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1837s for    90112 events => throughput is 4.90E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.5105s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3203s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1899s for    90112 events => throughput is 4.75E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.831484e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.743399e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.765454e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.751002e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,9 +289,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094173726920275] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3873s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3779s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0091s for     8192 events => throughput is 8.99E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3840s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3746s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0090s for     8192 events => throughput is 9.10E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -323,9 +323,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.11 [47.105684037363524] fbridge_mode=1
  [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4091s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3116s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0972s for    90112 events => throughput is 9.27E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.4196s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3180s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1013s for    90112 events => throughput is 8.89E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.995090e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.122881e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.148417e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.253637e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,9 +367,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094173726920275] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3894s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3807s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0085s for     8192 events => throughput is 9.68E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3836s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3748s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0086s for     8192 events => throughput is 9.55E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -401,9 +401,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.11 [47.105684037363524] fbridge_mode=1
  [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3961s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3040s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0917s for    90112 events => throughput is 9.82E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.4130s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3183s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0944s for    90112 events => throughput is 9.54E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.994646e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.492627e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.882184e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.845068e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,9 +445,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094178448427996] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3945s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3828s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0112s for     8192 events => throughput is 7.31E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3872s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3747s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0121s for     8192 events => throughput is 6.75E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -479,9 +479,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.11 [47.105688391432061] fbridge_mode=1
  [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5017s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3657s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1356s for    90112 events => throughput is 6.64E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.4540s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3208s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1329s for    90112 events => throughput is 6.78E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.837763e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.760706e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.925566e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.862105e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,8 +523,8 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094184162782994] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8112s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8099s
+ [COUNTERS] PROGRAM TOTAL          :    0.8072s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8059s
  [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.43E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
@@ -557,9 +557,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.11 [47.105694501043516] fbridge_mode=1
  [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7829s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7765s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0057s for    90112 events => throughput is 1.58E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.7625s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.7557s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0061s for    90112 events => throughput is 1.49E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.085941e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.122844e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.178660e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.186028e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.983696e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.914924e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.406286e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.415260e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.010543e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.864435e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.536473e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.542103e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.527299e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.479715e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.475317e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.411070e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt
index 65f004f30e..424c9d3f7b 100644
--- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt
@@ -1,10 +1,10 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 
-make USEBUILDDIR=1 BACKEND=cuda
-
 
 
+make USEBUILDDIR=1 BACKEND=cuda
 make USEBUILDDIR=1 BACKEND=cppnone
+
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
 
@@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-08_20:44:34
+DATE: 2024-08-30_00:53:03
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0
  [UNWEIGHT] Wrote 2601 events (found 5405 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8115s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7704s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0411s for     8192 events => throughput is 1.99E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8254s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7822s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0432s for     8192 events => throughput is 1.90E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4214s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3805s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0408s for     8192 events => throughput is 2.01E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4149s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3727s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0422s for     8192 events => throughput is 1.94E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0
  [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7670s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3128s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.4542s for    90112 events => throughput is 1.98E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.7857s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3213s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4644s for    90112 events => throughput is 1.94E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4222s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3775s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0442s for     8192 events => throughput is 1.85E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4173s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3713s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0455s for     8192 events => throughput is 1.80E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -167,10 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.11 [47.105696630006634] fbridge_mode=1
  [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7889s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3008s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4877s for    90112 events => throughput is 1.85E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [COUNTERS] PROGRAM TOTAL          :    1.8209s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3186s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5019s for    90112 events => throughput is 1.80E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.863098e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.821084e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.876650e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.828762e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,9 +211,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4042s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3795s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0242s for     8192 events => throughput is 3.38E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3975s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3723s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0247s for     8192 events => throughput is 3.32E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -245,9 +245,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.11 [47.105696630006626] fbridge_mode=1
  [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5750s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3065s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2680s for    90112 events => throughput is 3.36E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.5914s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3179s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2731s for    90112 events => throughput is 3.30E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.334875e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.312704e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.372227e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.281482e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,9 +289,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3946s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3794s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0147s for     8192 events => throughput is 5.56E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3879s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3719s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0156s for     8192 events => throughput is 5.26E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -323,9 +323,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1
  [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4696s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3034s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1658s for    90112 events => throughput is 5.44E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.4909s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3179s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1726s for    90112 events => throughput is 5.22E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.223051e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.294208e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.767945e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.336642e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,9 +367,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4019s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3874s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0142s for     8192 events => throughput is 5.78E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3881s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3733s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0143s for     8192 events => throughput is 5.73E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -401,9 +401,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1
  [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4595s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3077s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1514s for    90112 events => throughput is 5.95E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.4759s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3196s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1559s for    90112 events => throughput is 5.78E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.889622e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.832546e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.919078e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.943697e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,9 +445,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4002s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3783s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0214s for     8192 events => throughput is 3.82E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3971s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3744s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0223s for     8192 events => throughput is 3.68E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -479,9 +479,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1
  [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5451s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3093s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2354s for    90112 events => throughput is 3.83E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.5696s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3242s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2449s for    90112 events => throughput is 3.68E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.737875e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.610993e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.863403e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.604644e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,9 +523,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.09 [47.094184798437830] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8029s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8014s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.26E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8101s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8086s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0007s for     8192 events => throughput is 1.25E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -557,9 +557,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.11 [47.105695279068492] fbridge_mode=1
  [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7390s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7315s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0066s for    90112 events => throughput is 1.36E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.7563s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.7487s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0067s for    90112 events => throughput is 1.34E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.004360e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.016383e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.618155e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.577646e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.337805e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.254487e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.064726e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.057057e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.321717e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.262558e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.141622e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.142842e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.487761e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.258563e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.948699e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.974623e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt
index c52a8af2f9..9853411ba7 100644
--- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt
@@ -1,11 +1,11 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 
+
 make USEBUILDDIR=1 BACKEND=cuda
+make USEBUILDDIR=1 BACKEND=cppnone
 
 
-make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppsse4
-
 make USEBUILDDIR=1 BACKEND=cppavx2
 
 make USEBUILDDIR=1 BACKEND=cpp512y
@@ -13,8 +13,8 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-08_20:45:01
+DATE: 2024-08-30_00:53:30
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0
  [UNWEIGHT] Wrote 365 events (found 1496 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6887s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3666s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.3221s for     8192 events => throughput is 2.54E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.7011s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3682s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.3329s for     8192 events => throughput is 2.46E+04 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0
  [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6558s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3350s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.3208s for     8192 events => throughput is 2.55E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.6659s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3318s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.3341s for     8192 events => throughput is 2.45E+04 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0
  [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    5.1103s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5412s
- [COUNTERS] Fortran MEs      ( 1 ) :    3.5692s for    90112 events => throughput is 2.52E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    5.2088s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5545s
+ [COUNTERS] Fortran MEs      ( 1 ) :    3.6542s for    90112 events => throughput is 2.47E+04 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1
  [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6762s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3380s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3370s for     8192 events => throughput is 2.43E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.6836s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3335s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3488s for     8192 events => throughput is 2.35E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    5.2687s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5495s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    3.7180s for    90112 events => throughput is 2.42E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    5.4042s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5693s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    3.8337s for    90112 events => throughput is 2.35E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.517328e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.438451e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.477316e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.447901e+04                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,9 +211,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112748607748863] fbridge_mode=1
  [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.5207s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3399s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1801s for     8192 events => throughput is 4.55E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.5127s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3301s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1819s for     8192 events => throughput is 4.50E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -245,9 +245,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    3.4936s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5370s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.9559s for    90112 events => throughput is 4.61E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    3.5729s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5688s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    2.0033s for    90112 events => throughput is 4.50E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.723167e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.534029e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.710741e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.539640e+04                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,9 +289,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1
  [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4289s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3383s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0899s for     8192 events => throughput is 9.11E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4296s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3368s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0922s for     8192 events => throughput is 8.89E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -323,9 +323,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    2.5415s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5644s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.9765s for    90112 events => throughput is 9.23E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.5686s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5656s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.0024s for    90112 events => throughput is 8.99E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.063994e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.162844e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.113779e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.203130e+04                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,10 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1
  [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4521s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3684s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0831s for     8192 events => throughput is 9.86E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [COUNTERS] PROGRAM TOTAL          :    0.4127s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3310s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0811s for     8192 events => throughput is 1.01E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -401,9 +401,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    2.4440s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5615s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8818s for    90112 events => throughput is 1.02E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.4702s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5733s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8963s for    90112 events => throughput is 1.01E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.056563e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.036026e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.066565e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.036765e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,9 +445,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1
  [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4498s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3385s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1106s for     8192 events => throughput is 7.41E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4471s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3305s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1158s for     8192 events => throughput is 7.07E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -479,9 +479,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    2.7606s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5479s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.2119s for    90112 events => throughput is 7.44E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.8416s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5733s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.2675s for    90112 events => throughput is 7.11E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.524660e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.157314e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.502357e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.179221e+04                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,9 +523,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=1
  [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8444s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8355s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0059s for     8192 events => throughput is 1.38E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.7661s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7572s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0060s for     8192 events => throughput is 1.36E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0029s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -557,10 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.07924 [7.9238481932717736E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    1.9827s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.9565s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0233s for    90112 events => throughput is 3.86E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0028s
+ [COUNTERS] PROGRAM TOTAL          :    2.0255s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9988s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0237s for    90112 events => throughput is 3.80E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0029s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.637288e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.629113e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.243124e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.206213e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.002014e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.922280e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.239487e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.239743e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.002136e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.941872e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.250655e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.249673e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.001900e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.946587e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.746731e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.755614e+06                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt
index b25cff31e4..7af52fe973 100644
--- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt
@@ -1,9 +1,9 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 
-make USEBUILDDIR=1 BACKEND=cuda
 
-make USEBUILDDIR=1 BACKEND=cppnone
 
+make USEBUILDDIR=1 BACKEND=cuda
+make USEBUILDDIR=1 BACKEND=cppnone
 
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
@@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-08_20:45:43
+DATE: 2024-08-30_00:54:12
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0
  [UNWEIGHT] Wrote 365 events (found 1496 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6879s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3658s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.3221s for     8192 events => throughput is 2.54E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.6937s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3614s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.3324s for     8192 events => throughput is 2.46E+04 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0
  [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6575s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3322s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.3252s for     8192 events => throughput is 2.52E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.6601s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3286s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.3315s for     8192 events => throughput is 2.47E+04 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0
  [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    5.0903s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5245s
- [COUNTERS] Fortran MEs      ( 1 ) :    3.5658s for    90112 events => throughput is 2.53E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    5.2320s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5618s
+ [COUNTERS] Fortran MEs      ( 1 ) :    3.6702s for    90112 events => throughput is 2.46E+04 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112722616246457] fbridge_mode=1
  [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6630s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3346s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3273s for     8192 events => throughput is 2.50E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.6650s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3303s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3336s for     8192 events => throughput is 2.46E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0010s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.07924 [7.9238468293717765E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    5.1318s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5454s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    3.5854s for    90112 events => throughput is 2.51E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    5.2417s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5684s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    3.6724s for    90112 events => throughput is 2.45E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0010s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.562809e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.532359e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.549301e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.532906e+04                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,10 +211,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112720694019242] fbridge_mode=1
  [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4414s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3412s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0997s for     8192 events => throughput is 8.22E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [COUNTERS] PROGRAM TOTAL          :    0.4525s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3399s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1120s for     8192 events => throughput is 7.31E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -245,10 +245,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.07924 [7.9238454783817719E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6571s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5548s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1018s for    90112 events => throughput is 8.18E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [COUNTERS] PROGRAM TOTAL          :    2.6896s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5677s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1213s for    90112 events => throughput is 8.04E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.333170e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.197742e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.397937e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.182250e+04                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,9 +289,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112721757974454] fbridge_mode=1
  [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3825s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3366s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0455s for     8192 events => throughput is 1.80E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3765s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3295s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0466s for     8192 events => throughput is 1.76E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -323,10 +323,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.07924 [7.9238453732924513E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    2.0649s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5567s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5077s for    90112 events => throughput is 1.77E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [COUNTERS] PROGRAM TOTAL          :    2.0876s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5731s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5140s for    90112 events => throughput is 1.75E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.821951e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.789700e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.834362e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.781504e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,9 +367,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112721757974454] fbridge_mode=1
  [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3803s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3381s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0418s for     8192 events => throughput is 1.96E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3742s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3309s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0429s for     8192 events => throughput is 1.91E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -401,9 +401,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.07924 [7.9238453732924513E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    2.0303s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5712s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4587s for    90112 events => throughput is 1.96E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.0350s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5670s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4675s for    90112 events => throughput is 1.93E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.018262e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.973486e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.019326e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.975490e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,10 +445,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112723389095883] fbridge_mode=1
  [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3929s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3375s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0549s for     8192 events => throughput is 1.49E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [COUNTERS] PROGRAM TOTAL          :    0.3896s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3323s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0567s for     8192 events => throughput is 1.44E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -479,10 +479,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.07924 [7.9238464413054557E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    2.1189s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5295s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5889s for    90112 events => throughput is 1.53E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [COUNTERS] PROGRAM TOTAL          :    2.1882s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5687s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.6189s for    90112 events => throughput is 1.46E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.561264e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.454568e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.545662e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.469296e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,9 +523,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112725654777677] fbridge_mode=1
  [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7590s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7568s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0010s for     8192 events => throughput is 8.12E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.7619s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7598s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0010s for     8192 events => throughput is 8.23E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -557,10 +557,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.07924 [7.9238470908598507E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    1.9627s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.9510s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0105s for    90112 events => throughput is 8.59E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
+ [COUNTERS] PROGRAM TOTAL          :    2.0010s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9891s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0107s for    90112 events => throughput is 8.40E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.151184e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.172109e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.548948e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.550537e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.576425e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.551280e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.715469e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.686947e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.585156e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.549100e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.753005e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.728979e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.440113e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.413968e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.293588e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.281798e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt
index b6592dfe65..17f42d4ffa 100644
--- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt
@@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-08_20:46:20
+DATE: 2024-08-30_00:54:50
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0
  [UNWEIGHT] Wrote 365 events (found 1496 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6929s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3702s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.3227s for     8192 events => throughput is 2.54E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.6937s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3622s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.3315s for     8192 events => throughput is 2.47E+04 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0
  [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6641s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3385s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.3256s for     8192 events => throughput is 2.52E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.6619s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3285s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.3334s for     8192 events => throughput is 2.46E+04 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0
  [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    5.1698s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5570s
- [COUNTERS] Fortran MEs      ( 1 ) :    3.6128s for    90112 events => throughput is 2.49E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    5.2139s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5585s
+ [COUNTERS] Fortran MEs      ( 1 ) :    3.6553s for    90112 events => throughput is 2.47E+04 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,10 +133,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112748700702684] fbridge_mode=1
  [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6766s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3338s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3416s for     8192 events => throughput is 2.40E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
+ [COUNTERS] PROGRAM TOTAL          :    0.6848s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3306s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3530s for     8192 events => throughput is 2.32E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.07924 [7.9238482679400354E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    5.3154s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5455s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    3.7687s for    90112 events => throughput is 2.39E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    5.4616s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5794s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    3.8810s for    90112 events => throughput is 2.32E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.463950e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.409533e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.478616e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.396518e+04                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,9 +211,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112748702805033] fbridge_mode=1
  [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.5103s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3345s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1749s for     8192 events => throughput is 4.68E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.5127s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3310s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1808s for     8192 events => throughput is 4.53E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -245,9 +245,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.07924 [7.9238482683055667E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    3.4746s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5384s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.9354s for    90112 events => throughput is 4.66E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    3.5681s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5715s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.9958s for    90112 events => throughput is 4.52E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.832626e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.663293e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.815562e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.697752e+04                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,9 +289,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1
  [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4266s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3394s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0865s for     8192 events => throughput is 9.47E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4226s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3320s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0899s for     8192 events => throughput is 9.11E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -323,9 +323,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    2.4911s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5269s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.9636s for    90112 events => throughput is 9.35E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.5611s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5711s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.9894s for    90112 events => throughput is 9.11E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.435081e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.338563e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.477580e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.283232e+04                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,9 +367,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1
  [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4142s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3362s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0774s for     8192 events => throughput is 1.06E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4112s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3315s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0791s for     8192 events => throughput is 1.04E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -401,10 +401,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    2.3905s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5342s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8556s for    90112 events => throughput is 1.05E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [COUNTERS] PROGRAM TOTAL          :    2.4528s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5718s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8803s for    90112 events => throughput is 1.02E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.087061e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.055357e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.088736e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.047729e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,9 +445,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112748700265108] fbridge_mode=1
  [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4463s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3356s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1100s for     8192 events => throughput is 7.45E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4511s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3308s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1195s for     8192 events => throughput is 6.86E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -479,9 +479,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.07924 [7.9238482666076374E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    2.7724s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5419s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.2297s for    90112 events => throughput is 7.33E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.8840s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5752s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.3080s for    90112 events => throughput is 6.89E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.268797e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.986026e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.343356e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.961269e+04                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,9 +523,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.1011 [0.10112748601943165] fbridge_mode=1
  [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7682s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7592s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0060s for     8192 events => throughput is 1.36E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.7685s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7595s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0060s for     8192 events => throughput is 1.37E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0029s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -557,10 +557,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.07924 [7.9238481937154381E-002] fbridge_mode=1
  [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    1.9875s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.9612s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0233s for    90112 events => throughput is 3.86E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0029s
+ [COUNTERS] PROGRAM TOTAL          :    2.0160s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9892s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0237s for    90112 events => throughput is 3.80E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0030s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.654166e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.643017e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.808330e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.890572e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.001990e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.862432e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.235577e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.232251e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.000218e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.923478e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.245999e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.245240e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.996930e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.915356e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.726284e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.729398e+06                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt
index 9f965c04b5..72b9dd250a 100644
--- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt
@@ -1,11 +1,11 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 
-make USEBUILDDIR=1 BACKEND=cuda
 
+make USEBUILDDIR=1 BACKEND=cuda
 
 make USEBUILDDIR=1 BACKEND=cppnone
-
 make USEBUILDDIR=1 BACKEND=cppsse4
+
 make USEBUILDDIR=1 BACKEND=cppavx2
 
 make USEBUILDDIR=1 BACKEND=cpp512y
@@ -18,8 +18,8 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-08_20:47:02
+DATE: 2024-08-30_00:55:32
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0
  [UNWEIGHT] Wrote 11 events (found 187 events)
- [COUNTERS] PROGRAM TOTAL          :    4.5167s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2657s
- [COUNTERS] Fortran MEs      ( 1 ) :    4.2511s for     8192 events => throughput is 1.93E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    4.6116s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2616s
+ [COUNTERS] Fortran MEs      ( 1 ) :    4.3500s for     8192 events => throughput is 1.88E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0
  [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    4.4866s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2643s
- [COUNTERS] Fortran MEs      ( 1 ) :    4.2223s for     8192 events => throughput is 1.94E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    4.5881s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2535s
+ [COUNTERS] Fortran MEs      ( 1 ) :    4.3346s for     8192 events => throughput is 1.89E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.2256 [0.22558083266099815] fbridge_mode=0
  [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   48.4461s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8443s
- [COUNTERS] Fortran MEs      ( 1 ) :   46.6018s for    90112 events => throughput is 1.93E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :   49.6135s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8559s
+ [COUNTERS] Fortran MEs      ( 1 ) :   47.7576s for    90112 events => throughput is 1.89E+03 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,10 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320556621222236] fbridge_mode=1
  [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    4.6404s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2618s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    4.3690s for     8192 events => throughput is 1.88E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0096s
+ [COUNTERS] PROGRAM TOTAL          :    4.7506s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2515s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    4.4898s for     8192 events => throughput is 1.82E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0093s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -167,10 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   49.9380s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7954s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   48.1336s for    90112 events => throughput is 1.87E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0090s
+ [COUNTERS] PROGRAM TOTAL          :   51.2369s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8206s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   49.4072s for    90112 events => throughput is 1.82E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0092s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.926413e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.887013e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.935484e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.886305e+03                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,10 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320556621222236] fbridge_mode=1
  [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6125s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2606s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    2.3472s for     8192 events => throughput is 3.49E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0046s
+ [COUNTERS] PROGRAM TOTAL          :    2.6725s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2519s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    2.4156s for     8192 events => throughput is 3.39E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0050s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -245,10 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.2256 [0.22558083266099785] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   27.5257s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8027s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   25.7180s for    90112 events => throughput is 3.50E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0050s
+ [COUNTERS] PROGRAM TOTAL          :   28.2511s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8187s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   26.4276s for    90112 events => throughput is 3.41E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0049s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.649842e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.532551e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.636818e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.540511e+03                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,10 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320556621222231] fbridge_mode=1
  [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    1.2653s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2598s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.0030s for     8192 events => throughput is 8.17E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0026s
+ [COUNTERS] PROGRAM TOTAL          :    1.2920s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2501s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.0393s for     8192 events => throughput is 7.88E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0025s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -323,10 +323,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   12.8598s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7908s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   11.0665s for    90112 events => throughput is 8.14E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0025s
+ [COUNTERS] PROGRAM TOTAL          :   13.2584s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8186s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   11.4371s for    90112 events => throughput is 7.88E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0027s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.344831e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.101118e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.416676e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.086189e+03                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,9 +367,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320556621222231] fbridge_mode=1
  [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    1.1673s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2599s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.9051s for     8192 events => throughput is 9.05E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.1797s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2518s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.9256s for     8192 events => throughput is 8.85E+03 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0023s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -401,10 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   11.7872s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8132s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    9.9717s for    90112 events => throughput is 9.04E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0022s
+ [COUNTERS] PROGRAM TOTAL          :   12.0258s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8174s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   10.2060s for    90112 events => throughput is 8.83E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0024s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.472083e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.177055e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.534343e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.175263e+03                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,9 +445,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320556621222231] fbridge_mode=1
  [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3936s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2589s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1317s for     8192 events => throughput is 7.24E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.4405s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2500s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1875s for     8192 events => throughput is 6.90E+03 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0030s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -479,10 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   14.2691s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8171s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   12.4493s for    90112 events => throughput is 7.24E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0027s
+ [COUNTERS] PROGRAM TOTAL          :   14.9495s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8217s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   13.1249s for    90112 events => throughput is 6.87E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0030s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.935643e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.981801e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.348983e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.965066e+03                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,10 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320556621222225] fbridge_mode=1
  [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7693s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6983s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0363s for     8192 events => throughput is 2.26E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0347s
+ [COUNTERS] PROGRAM TOTAL          :    0.7575s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6866s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0363s for     8192 events => throughput is 2.25E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0346s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -557,10 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.2256 [0.22558083266099782] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6062s
- [COUNTERS] Fortran Overhead ( 0 ) :    2.2048s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3669s for    90112 events => throughput is 2.46E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0344s
+ [COUNTERS] PROGRAM TOTAL          :    2.6470s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.2462s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3664s for    90112 events => throughput is 2.46E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0345s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.290486e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.276910e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.506388e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.501526e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.134196e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.128345e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.177921e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.149669e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.129278e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.128670e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.155764e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.167581e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.126990e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.106037e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.446377e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.450094e+05                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt
index cd633f37c7..1eb1b6bf73 100644
--- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt
@@ -1,11 +1,11 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 
-
 make USEBUILDDIR=1 BACKEND=cuda
 
+
+
 make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppsse4
-
 make USEBUILDDIR=1 BACKEND=cppavx2
 
 make USEBUILDDIR=1 BACKEND=cpp512y
@@ -14,15 +14,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-08_20:51:00
+DATE: 2024-08-30_00:59:36
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0
  [UNWEIGHT] Wrote 11 events (found 187 events)
- [COUNTERS] PROGRAM TOTAL          :    4.4959s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2635s
- [COUNTERS] Fortran MEs      ( 1 ) :    4.2323s for     8192 events => throughput is 1.94E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    4.6113s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2567s
+ [COUNTERS] Fortran MEs      ( 1 ) :    4.3546s for     8192 events => throughput is 1.88E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0
  [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    4.4788s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2631s
- [COUNTERS] Fortran MEs      ( 1 ) :    4.2156s for     8192 events => throughput is 1.94E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    4.5970s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2531s
+ [COUNTERS] Fortran MEs      ( 1 ) :    4.3439s for     8192 events => throughput is 1.89E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.2256 [0.22558083266099815] fbridge_mode=0
  [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   48.4352s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8357s
- [COUNTERS] Fortran MEs      ( 1 ) :   46.5995s for    90112 events => throughput is 1.93E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :   49.6113s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8558s
+ [COUNTERS] Fortran MEs      ( 1 ) :   47.7555s for    90112 events => throughput is 1.89E+03 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,10 +133,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320716615478996] fbridge_mode=1
  [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    4.5354s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2660s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    4.2605s for     8192 events => throughput is 1.92E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0089s
+ [COUNTERS] PROGRAM TOTAL          :    4.6095s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2504s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    4.3503s for     8192 events => throughput is 1.88E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0087s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -167,10 +167,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.2256 [0.22558162567940870] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   48.5468s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7982s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   46.7401s for    90112 events => throughput is 1.93E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0085s
+ [COUNTERS] PROGRAM TOTAL          :   49.8390s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8197s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   48.0105s for    90112 events => throughput is 1.88E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0088s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.996945e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.939353e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.982014e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.936979e+03                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,10 +211,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320708851010073] fbridge_mode=1
  [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4573s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2634s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1914s for     8192 events => throughput is 6.88E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0025s
+ [COUNTERS] PROGRAM TOTAL          :    1.4544s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2505s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.2012s for     8192 events => throughput is 6.82E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0026s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -245,10 +245,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.2256 [0.22558157380141428] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   14.6570s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7854s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   12.8693s for    90112 events => throughput is 7.00E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0024s
+ [COUNTERS] PROGRAM TOTAL          :   15.0990s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8254s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   13.2710s for    90112 events => throughput is 6.79E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0026s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.255598e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.986380e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.246435e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.983775e+03                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,9 +289,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320704806184321] fbridge_mode=1
  [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7739s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2587s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5137s for     8192 events => throughput is 1.59E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.7803s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2500s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5288s for     8192 events => throughput is 1.55E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0015s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -323,10 +323,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.2256 [0.22558158459897135] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :    7.4672s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7991s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    5.6666s for    90112 events => throughput is 1.59E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0014s
+ [COUNTERS] PROGRAM TOTAL          :    7.6717s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8162s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    5.8540s for    90112 events => throughput is 1.54E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0015s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.606140e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.588565e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.576957e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.580506e+04                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,10 +367,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320704806184321] fbridge_mode=1
  [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7680s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2709s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4957s for     8192 events => throughput is 1.65E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0013s
+ [COUNTERS] PROGRAM TOTAL          :    0.7194s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2503s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4678s for     8192 events => throughput is 1.75E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0014s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -401,10 +401,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.2256 [0.22558158459897135] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :    6.7809s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7804s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    4.9992s for    90112 events => throughput is 1.80E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0014s
+ [COUNTERS] PROGRAM TOTAL          :    6.9505s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8159s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    5.1333s for    90112 events => throughput is 1.76E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0013s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.849666e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.792140e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.858554e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.802320e+04                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,9 +445,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320713685871445] fbridge_mode=1
  [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8187s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2599s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5571s for     8192 events => throughput is 1.47E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8450s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2549s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5884s for     8192 events => throughput is 1.39E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0017s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -479,10 +479,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.2256 [0.22558162184774774] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :    7.9104s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7899s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    6.1190s for    90112 events => throughput is 1.47E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0015s
+ [COUNTERS] PROGRAM TOTAL          :    8.3082s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8257s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    6.4808s for    90112 events => throughput is 1.39E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0017s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.496224e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.385160e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.504281e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.403555e+04                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,10 +523,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320719394836651] fbridge_mode=1
  [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7396s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6908s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0246s for     8192 events => throughput is 3.32E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0242s
+ [COUNTERS] PROGRAM TOTAL          :    0.7335s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6847s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0246s for     8192 events => throughput is 3.34E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0243s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -557,10 +557,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.2256 [0.22558167135091578] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :    2.4680s
- [COUNTERS] Fortran Overhead ( 0 ) :    2.1917s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2521s for    90112 events => throughput is 3.57E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0241s
+ [COUNTERS] PROGRAM TOTAL          :    2.5288s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.2506s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2539s for    90112 events => throughput is 3.55E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0243s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.382988e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.368889e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.717142e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.732230e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.139748e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.130776e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.304954e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.306866e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.085623e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.087874e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.300454e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.214851e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.130448e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.074217e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.397157e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.388796e+05                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt
index 27512be658..f01e005e58 100644
--- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt
@@ -3,8 +3,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g
 
 make USEBUILDDIR=1 BACKEND=cuda
 
-
 make USEBUILDDIR=1 BACKEND=cppnone
+
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
 
@@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-08_20:54:10
+DATE: 2024-08-30_01:02:50
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0
  [UNWEIGHT] Wrote 11 events (found 187 events)
- [COUNTERS] PROGRAM TOTAL          :    4.4700s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2619s
- [COUNTERS] Fortran MEs      ( 1 ) :    4.2081s for     8192 events => throughput is 1.95E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    4.5981s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2571s
+ [COUNTERS] Fortran MEs      ( 1 ) :    4.3410s for     8192 events => throughput is 1.89E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0
  [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    4.4683s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2604s
- [COUNTERS] Fortran MEs      ( 1 ) :    4.2079s for     8192 events => throughput is 1.95E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    4.5967s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2533s
+ [COUNTERS] Fortran MEs      ( 1 ) :    4.3434s for     8192 events => throughput is 1.89E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.2256 [0.22558083266099815] fbridge_mode=0
  [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   48.3196s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8366s
- [COUNTERS] Fortran MEs      ( 1 ) :   46.4830s for    90112 events => throughput is 1.94E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :   49.6248s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8598s
+ [COUNTERS] Fortran MEs      ( 1 ) :   47.7649s for    90112 events => throughput is 1.89E+03 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,10 +133,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320556893412546] fbridge_mode=1
  [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    4.6760s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2586s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    4.4088s for     8192 events => throughput is 1.86E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0086s
+ [COUNTERS] PROGRAM TOTAL          :    4.7999s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2518s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    4.5389s for     8192 events => throughput is 1.80E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0092s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -167,10 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.2256 [0.22558083370546855] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   50.5724s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8031s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   48.7604s for    90112 events => throughput is 1.85E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0089s
+ [COUNTERS] PROGRAM TOTAL          :   51.9774s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8235s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   50.1446s for    90112 events => throughput is 1.80E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0094s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.909521e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.855335e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.899981e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.862802e+03                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,10 +211,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320556780656974] fbridge_mode=1
  [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    2.5687s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2576s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    2.3063s for     8192 events => throughput is 3.55E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0048s
+ [COUNTERS] PROGRAM TOTAL          :    2.6423s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2504s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    2.3868s for     8192 events => throughput is 3.43E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0051s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -245,10 +245,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.2256 [0.22558083390630859] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   27.4318s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7915s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   25.6356s for    90112 events => throughput is 3.52E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0047s
+ [COUNTERS] PROGRAM TOTAL          :   28.5409s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8402s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   26.6956s for    90112 events => throughput is 3.38E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0051s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.646364e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.436169e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.634455e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.491615e+03                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,10 +289,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320556770726795] fbridge_mode=1
  [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    1.2686s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2604s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.0056s for     8192 events => throughput is 8.15E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0025s
+ [COUNTERS] PROGRAM TOTAL          :    1.2920s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2524s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.0372s for     8192 events => throughput is 7.90E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0024s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -323,10 +323,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.2256 [0.22558083379720220] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   12.9032s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7920s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   11.1088s for    90112 events => throughput is 8.11E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0024s
+ [COUNTERS] PROGRAM TOTAL          :   13.2628s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8156s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   11.4447s for    90112 events => throughput is 7.87E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0025s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.153831e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.072952e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.410165e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.014073e+03                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,9 +367,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320556770726795] fbridge_mode=1
  [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    1.1480s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2607s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8850s for     8192 events => throughput is 9.26E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.1675s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2513s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.9138s for     8192 events => throughput is 8.97E+03 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0023s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -401,10 +401,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.2256 [0.22558083379720220] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   11.5478s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7830s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    9.7625s for    90112 events => throughput is 9.23E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0024s
+ [COUNTERS] PROGRAM TOTAL          :   11.8988s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8240s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   10.0725s for    90112 events => throughput is 8.95E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0023s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.509937e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.224504e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.503575e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.269645e+03                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,9 +445,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320556770726795] fbridge_mode=1
  [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3881s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2592s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1259s for     8192 events => throughput is 7.28E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.4482s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2514s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1937s for     8192 events => throughput is 6.86E+03 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0031s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -479,10 +479,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.2256 [0.22558083379720220] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   14.4378s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7995s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   12.6355s for    90112 events => throughput is 7.13E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0028s
+ [COUNTERS] PROGRAM TOTAL          :   15.0050s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8226s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   13.1794s for    90112 events => throughput is 6.84E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0030s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.378664e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.918159e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.252552e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.921301e+03                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,10 +523,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.4632 [0.46320556665261842] fbridge_mode=1
  [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7612s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6909s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0360s for     8192 events => throughput is 2.27E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0343s
+ [COUNTERS] PROGRAM TOTAL          :    0.7570s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6865s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0361s for     8192 events => throughput is 2.27E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0344s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -557,10 +557,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_
  [XSECTION] ChannelId = 112
  [XSECTION] Cross section = 0.2256 [0.22558083224243403] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :    2.5943s
- [COUNTERS] Fortran Overhead ( 0 ) :    2.1940s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3660s for    90112 events => throughput is 2.46E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0343s
+ [COUNTERS] PROGRAM TOTAL          :    2.6619s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.2612s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3664s for    90112 events => throughput is 2.46E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0344s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.292672e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.283925e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.513091e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.521176e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.132768e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.129933e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.151465e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.186234e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.134281e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.133957e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.177596e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.181867e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.130147e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.130752e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.451952e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.449188e+05                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
index 3147b869f0..744ad57a47 100644
--- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
@@ -1,11 +1,11 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 
-make USEBUILDDIR=1 BACKEND=cuda
 
+make USEBUILDDIR=1 BACKEND=cuda
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make USEBUILDDIR=1 BACKEND=cppsse4
 
+make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
 
 make USEBUILDDIR=1 BACKEND=cpp512y
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-29_00:29:47
+DATE: 2024-08-30_01:08:23
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0
  [UNWEIGHT] Wrote 1 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  104.7057s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5086s
- [COUNTERS] Fortran MEs      ( 1 ) :  104.1971s for     8192 events => throughput is 7.86E+01 events/s
+ [COUNTERS] PROGRAM TOTAL          :  103.8119s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5133s
+ [COUNTERS] Fortran MEs      ( 1 ) :  103.2986s for     8192 events => throughput is 7.93E+01 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  103.9667s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5128s
- [COUNTERS] Fortran MEs      ( 1 ) :  103.4539s for     8192 events => throughput is 7.92E+01 events/s
+ [COUNTERS] PROGRAM TOTAL          :  103.9562s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5117s
+ [COUNTERS] Fortran MEs      ( 1 ) :  103.4445s for     8192 events => throughput is 7.92E+01 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          : 1143.5314s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4515s
- [COUNTERS] Fortran MEs      ( 1 ) : 1139.0798s for    90112 events => throughput is 7.91E+01 events/s
+ [COUNTERS] PROGRAM TOTAL          : 1141.2639s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4413s
+ [COUNTERS] Fortran MEs      ( 1 ) : 1136.8226s for    90112 events => throughput is 7.93E+01 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,10 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939193E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  126.8964s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5197s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  126.1682s for     8192 events => throughput is 6.49E+01 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2085s
+ [COUNTERS] PROGRAM TOTAL          :  124.5510s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5196s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  123.8327s for     8192 events => throughput is 6.62E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1987s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -167,10 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          : 1403.2870s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.5004s
- [COUNTERS] CudaCpp MEs      ( 2 ) : 1398.5804s for    90112 events => throughput is 6.44E+01 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2061s
+ [COUNTERS] PROGRAM TOTAL          : 1400.6576s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5060s
+ [COUNTERS] CudaCpp MEs      ( 2 ) : 1395.9552s for    90112 events => throughput is 6.46E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1964s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.651833e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.057551e+01                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.593965e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.753971e+01                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,10 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939197E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   62.5955s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5183s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   61.9733s for     8192 events => throughput is 1.32E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1039s
+ [COUNTERS] PROGRAM TOTAL          :   62.4647s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5243s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   61.8356s for     8192 events => throughput is 1.32E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1048s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -245,10 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086656017E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  682.9443s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.5013s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  678.3401s for    90112 events => throughput is 1.33E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1029s
+ [COUNTERS] PROGRAM TOTAL          :  687.6948s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5179s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  683.0742s for    90112 events => throughput is 1.32E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1026s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.542499e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.563733e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.547949e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.550612e+02                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,10 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   29.4732s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5195s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   28.9048s for     8192 events => throughput is 2.83E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0489s
+ [COUNTERS] PROGRAM TOTAL          :   29.7924s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5177s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   29.2260s for     8192 events => throughput is 2.80E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0487s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -323,10 +323,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  326.1256s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4980s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  321.5780s for    90112 events => throughput is 2.80E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0495s
+ [COUNTERS] PROGRAM TOTAL          :  324.2440s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4975s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  319.6986s for    90112 events => throughput is 2.82E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0479s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.348554e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.360978e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.339258e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.361478e+02                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,10 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   26.4013s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5189s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   25.8409s for     8192 events => throughput is 3.17E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0416s
+ [COUNTERS] PROGRAM TOTAL          :   26.2312s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5190s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   25.6700s for     8192 events => throughput is 3.19E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0423s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -401,10 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  288.3304s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4909s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  283.7968s for    90112 events => throughput is 3.18E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0426s
+ [COUNTERS] PROGRAM TOTAL          :  286.1989s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4892s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  281.6678s for    90112 events => throughput is 3.20E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0420s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.889567e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.875186e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.856009e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.884361e+02                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,10 +445,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   26.2781s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5185s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   25.7127s for     8192 events => throughput is 3.19E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0470s
+ [COUNTERS] PROGRAM TOTAL          :   26.4167s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5180s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   25.8509s for     8192 events => throughput is 3.17E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0478s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -479,10 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  288.1563s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.5041s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  283.6047s for    90112 events => throughput is 3.18E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0475s
+ [COUNTERS] PROGRAM TOTAL          :  288.3192s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5257s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  283.7442s for    90112 events => throughput is 3.18E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0493s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.430830e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.383791e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.419526e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.435116e+02                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,10 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :    3.2059s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.0215s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.0984s for     8192 events => throughput is 7.46E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    1.0860s
+ [COUNTERS] PROGRAM TOTAL          :    3.1981s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.0109s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.0998s for     8192 events => throughput is 7.45E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    1.0874s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -557,10 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086656006E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :   18.0242s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.9891s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   11.9530s for    90112 events => throughput is 7.54E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    1.0821s
+ [COUNTERS] PROGRAM TOTAL          :   17.9918s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.9757s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   11.9277s for    90112 events => throughput is 7.55E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    1.0883s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.504621e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.501572e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.292840e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.301580e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.223985e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.262889e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 512 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.586185e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.584992e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.274801e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.233679e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.422438e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.452839e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.222510e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.249444e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.234675e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.235776e+03                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt
index 4ffdbee10a..d6f64f4919 100644
--- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt
@@ -1,7 +1,7 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 
-make USEBUILDDIR=1 BACKEND=cuda
 
+make USEBUILDDIR=1 BACKEND=cuda
 
 make USEBUILDDIR=1 BACKEND=cppnone
 
@@ -14,8 +14,8 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 
-make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-08_22:23:03
+DATE: 2024-08-30_02:33:47
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0
  [UNWEIGHT] Wrote 1 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  101.3873s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5075s
- [COUNTERS] Fortran MEs      ( 1 ) :  100.8798s for     8192 events => throughput is 8.12E+01 events/s
+ [COUNTERS] PROGRAM TOTAL          :  103.7942s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5071s
+ [COUNTERS] Fortran MEs      ( 1 ) :  103.2871s for     8192 events => throughput is 7.93E+01 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  102.2416s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5135s
- [COUNTERS] Fortran MEs      ( 1 ) :  101.7281s for     8192 events => throughput is 8.05E+01 events/s
+ [COUNTERS] PROGRAM TOTAL          :  103.8381s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5120s
+ [COUNTERS] Fortran MEs      ( 1 ) :  103.3261s for     8192 events => throughput is 7.93E+01 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          : 1114.7300s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.3428s
- [COUNTERS] Fortran MEs      ( 1 ) : 1110.3872s for    90112 events => throughput is 8.12E+01 events/s
+ [COUNTERS] PROGRAM TOTAL          : 1145.7081s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4432s
+ [COUNTERS] Fortran MEs      ( 1 ) : 1141.2649s for    90112 events => throughput is 7.90E+01 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -134,10 +134,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.241e-06 [1.2405719945779552E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  111.0089s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5100s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  110.3187s for     8192 events => throughput is 7.43E+01 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1802s
+ [COUNTERS] PROGRAM TOTAL          :  115.2915s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5177s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  114.5848s for     8192 events => throughput is 7.15E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1889s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -169,10 +169,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.333e-07 [2.3326290777570335E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          : 1216.8479s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4035s
- [COUNTERS] CudaCpp MEs      ( 2 ) : 1212.2644s for    90112 events => throughput is 7.43E+01 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1800s
+ [COUNTERS] PROGRAM TOTAL          : 1259.1659s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5060s
+ [COUNTERS] CudaCpp MEs      ( 2 ) : 1254.4725s for    90112 events => throughput is 7.18E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1874s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.795452e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.521351e+01                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.783118e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.544662e+01                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -214,10 +214,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.241e-06 [1.2405716994349971E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   27.4750s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5164s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   26.9120s for     8192 events => throughput is 3.04E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0465s
+ [COUNTERS] PROGRAM TOTAL          :   28.4388s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5228s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   27.8678s for     8192 events => throughput is 2.94E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0481s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -249,10 +249,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.333e-07 [2.3326284885505778E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  300.8248s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4082s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  296.3700s for    90112 events => throughput is 3.04E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0466s
+ [COUNTERS] PROGRAM TOTAL          :  310.3944s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5057s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  305.8410s for    90112 events => throughput is 2.95E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0477s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.485944e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.377580e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.470723e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.375822e+02                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -294,10 +294,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.241e-06 [1.2405716646933743E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   14.5936s
+ [COUNTERS] PROGRAM TOTAL          :   15.0768s
  [COUNTERS] Fortran Overhead ( 0 ) :    0.5183s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   14.0522s for     8192 events => throughput is 5.83E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0231s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   14.5341s for     8192 events => throughput is 5.64E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0245s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -329,10 +329,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.333e-07 [2.3326277033163402E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  158.5014s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4348s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  154.0430s for    90112 events => throughput is 5.85E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0236s
+ [COUNTERS] PROGRAM TOTAL          :  164.6278s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4988s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  160.1046s for    90112 events => throughput is 5.63E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0245s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.991558e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.752482e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.952358e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.781633e+02                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -374,10 +374,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.241e-06 [1.2405716646933743E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   12.8606s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5199s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   12.3203s for     8192 events => throughput is 6.65E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0204s
+ [COUNTERS] PROGRAM TOTAL          :   13.2735s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5198s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   12.7324s for     8192 events => throughput is 6.43E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0213s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -409,10 +409,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.333e-07 [2.3326277033163402E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  139.5398s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.3981s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  135.1212s for    90112 events => throughput is 6.67E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0205s
+ [COUNTERS] PROGRAM TOTAL          :  144.3691s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5029s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  139.8451s for    90112 events => throughput is 6.44E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0211s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.890802e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.750549e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.069181e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.735218e+02                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -454,10 +454,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.241e-06 [1.2405719257109645E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   12.8130s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5166s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   12.2739s for     8192 events => throughput is 6.67E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0225s
+ [COUNTERS] PROGRAM TOTAL          :   13.3695s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5197s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   12.8251s for     8192 events => throughput is 6.39E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0247s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -489,10 +489,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.333e-07 [2.3326283665697276E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  139.5916s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4260s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  135.1428s for    90112 events => throughput is 6.67E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0228s
+ [COUNTERS] PROGRAM TOTAL          :  145.6119s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5195s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  141.0684s for    90112 events => throughput is 6.39E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0239s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.223008e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.855652e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.135239e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.856623e+02                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -533,10 +533,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.241e-06 [1.2405721007137020E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :    2.1089s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.0215s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5332s for     8192 events => throughput is 1.54E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.5542s
+ [COUNTERS] PROGRAM TOTAL          :    2.1096s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.0064s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5405s for     8192 events => throughput is 1.52E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.5628s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -567,10 +567,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.333e-07 [2.3326295421688232E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :   11.2844s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.8851s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    5.8421s for    90112 events => throughput is 1.54E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.5572s
+ [COUNTERS] PROGRAM TOTAL          :   11.4101s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.9867s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    5.8619s for    90112 events => throughput is 1.54E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.5616s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -583,42 +583,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.533878e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.546914e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.547825e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.545489e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.147653e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.133242e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 512 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.124611e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.150716e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.134315e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.155991e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.131039e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.210272e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.139642e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.130976e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.021489e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.988585e+03                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt
index e8248fddca..5bfdf2922a 100644
--- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt
@@ -1,11 +1,11 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 
-
 make USEBUILDDIR=1 BACKEND=cuda
 
-make USEBUILDDIR=1 BACKEND=cppnone
 
+make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppsse4
+
 make USEBUILDDIR=1 BACKEND=cppavx2
 
 make USEBUILDDIR=1 BACKEND=cpp512y
@@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-08_23:26:17
+DATE: 2024-08-30_03:39:02
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0
  [UNWEIGHT] Wrote 1 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  103.0122s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5139s
- [COUNTERS] Fortran MEs      ( 1 ) :  102.4983s for     8192 events => throughput is 7.99E+01 events/s
+ [COUNTERS] PROGRAM TOTAL          :  104.0089s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5087s
+ [COUNTERS] Fortran MEs      ( 1 ) :  103.5002s for     8192 events => throughput is 7.91E+01 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  101.2993s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5294s
- [COUNTERS] Fortran MEs      ( 1 ) :  100.7699s for     8192 events => throughput is 8.13E+01 events/s
+ [COUNTERS] PROGRAM TOTAL          :  104.2447s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5161s
+ [COUNTERS] Fortran MEs      ( 1 ) :  103.7286s for     8192 events => throughput is 7.90E+01 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          : 1118.7642s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.3619s
- [COUNTERS] Fortran MEs      ( 1 ) : 1114.4022s for    90112 events => throughput is 8.09E+01 events/s
+ [COUNTERS] PROGRAM TOTAL          : 1141.3180s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4408s
+ [COUNTERS] Fortran MEs      ( 1 ) : 1136.8772s for    90112 events => throughput is 7.93E+01 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,10 +133,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985299359844E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  125.7885s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5193s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  125.0621s for     8192 events => throughput is 6.55E+01 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2071s
+ [COUNTERS] PROGRAM TOTAL          :  126.1475s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5218s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  125.4139s for     8192 events => throughput is 6.53E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2118s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -167,10 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993212353001E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          : 1322.8827s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.3903s
- [COUNTERS] CudaCpp MEs      ( 2 ) : 1318.2870s for    90112 events => throughput is 6.84E+01 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2054s
+ [COUNTERS] PROGRAM TOTAL          : 1383.7655s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5039s
+ [COUNTERS] CudaCpp MEs      ( 2 ) : 1379.0500s for    90112 events => throughput is 6.53E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2115s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.761597e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.539167e+01                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.724704e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.538387e+01                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,10 +211,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985295828471E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   62.4510s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5155s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   61.8333s for     8192 events => throughput is 1.32E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1022s
+ [COUNTERS] PROGRAM TOTAL          :   65.2362s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5183s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   64.6135s for     8192 events => throughput is 1.27E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1044s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -245,10 +245,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993222645653E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  684.8121s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4198s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  680.2921s for    90112 events => throughput is 1.32E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1003s
+ [COUNTERS] PROGRAM TOTAL          :  720.6180s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5092s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  716.0059s for    90112 events => throughput is 1.26E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1030s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.589042e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.544939e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.588931e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.546626e+02                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,10 +289,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   27.0092s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5181s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   26.4459s for     8192 events => throughput is 3.10E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0452s
+ [COUNTERS] PROGRAM TOTAL          :   28.2487s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5164s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   27.6865s for     8192 events => throughput is 2.96E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0458s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -323,10 +323,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  298.0409s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4173s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  293.5790s for    90112 events => throughput is 3.07E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0445s
+ [COUNTERS] PROGRAM TOTAL          :  306.6356s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4981s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  302.0914s for    90112 events => throughput is 2.98E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0461s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.648206e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.532662e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.625373e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.529106e+02                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,10 +367,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   24.3540s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5168s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   23.7936s for     8192 events => throughput is 3.44E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0436s
+ [COUNTERS] PROGRAM TOTAL          :   24.9404s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5184s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   24.3827s for     8192 events => throughput is 3.36E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0393s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -401,10 +401,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  269.6777s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4164s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  265.2234s for    90112 events => throughput is 3.40E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0378s
+ [COUNTERS] PROGRAM TOTAL          :  273.3928s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4984s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  268.8528s for    90112 events => throughput is 3.35E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0417s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.285493e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.133789e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.289545e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.144899e+02                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,10 +445,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   25.1227s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5145s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   24.5642s for     8192 events => throughput is 3.33E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0441s
+ [COUNTERS] PROGRAM TOTAL          :   25.8510s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5190s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   25.2849s for     8192 events => throughput is 3.24E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0471s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -479,10 +479,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  274.1583s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4200s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  269.6946s for    90112 events => throughput is 3.34E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0436s
+ [COUNTERS] PROGRAM TOTAL          :  284.5120s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5136s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  279.9519s for    90112 events => throughput is 3.22E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0466s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.625912e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.471913e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.662510e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.462958e+02                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,10 +523,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985217419736E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :    2.7717s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.0261s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8763s for     8192 events => throughput is 9.35E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.8694s
+ [COUNTERS] PROGRAM TOTAL          :    2.7567s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.0084s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8770s for     8192 events => throughput is 9.34E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.8713s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -557,10 +557,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993078576733E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :   15.2659s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.8943s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    9.5013s for    90112 events => throughput is 9.48E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.8704s
+ [COUNTERS] PROGRAM TOTAL          :   15.3678s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.9924s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    9.5033s for    90112 events => throughput is 9.48E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.8721s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.434661e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.459517e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.089765e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.088954e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.112116e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.109677e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 512 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.160890e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.157759e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.108390e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.112863e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.111312e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.110468e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.109990e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.115265e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.638783e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.668124e+03                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt
index b877c26fea..d5d9f39a76 100644
--- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt
@@ -1,9 +1,9 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu
 
-make USEBUILDDIR=1 BACKEND=cuda
 
 
 
+make USEBUILDDIR=1 BACKEND=cuda
 make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
@@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-08_20:58:09
+DATE: 2024-08-30_01:06:56
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0
  [UNWEIGHT] Wrote 404 events (found 1817 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4754s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4051s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0703s for     8192 events => throughput is 1.16E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4844s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4104s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0740s for     8192 events => throughput is 1.11E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0
  [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4153s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3445s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0708s for     8192 events => throughput is 1.16E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4098s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3373s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0726s for     8192 events => throughput is 1.13E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0
  [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    2.3303s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5573s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.7730s for    90112 events => throughput is 1.17E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.3919s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5957s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.7962s for    90112 events => throughput is 1.13E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110539351263335] fbridge_mode=1
  [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4189s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3418s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0764s for     8192 events => throughput is 1.07E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4172s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3383s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0781s for     8192 events => throughput is 1.05E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1
  [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    2.3766s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5374s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8384s for    90112 events => throughput is 1.07E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.4327s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5686s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8633s for    90112 events => throughput is 1.04E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.104999e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.067668e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.080050e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.067401e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,9 +211,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110539351262541] fbridge_mode=1
  [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3875s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3450s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0419s for     8192 events => throughput is 1.96E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3804s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3364s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0434s for     8192 events => throughput is 1.89E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -245,9 +245,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2151 [0.21510686556561281] fbridge_mode=1
  [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    2.0024s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5394s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4624s for    90112 events => throughput is 1.95E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.0534s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5637s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4891s for    90112 events => throughput is 1.84E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.937885e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.925397e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.972484e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.917741e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,9 +289,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1
  [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3673s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3427s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0240s for     8192 events => throughput is 3.41E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3636s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3380s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0250s for     8192 events => throughput is 3.28E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -323,9 +323,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1
  [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8108s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5445s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2657s for    90112 events => throughput is 3.39E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.8426s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5659s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2761s for    90112 events => throughput is 3.26E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.384861e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.279688e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.378583e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.324148e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,10 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1
  [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3684s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3456s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0222s for     8192 events => throughput is 3.69E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [COUNTERS] PROGRAM TOTAL          :    0.3593s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3360s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0227s for     8192 events => throughput is 3.60E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -401,9 +401,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1
  [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7798s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5417s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2375s for    90112 events => throughput is 3.79E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.8246s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5753s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2488s for    90112 events => throughput is 3.62E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.465878e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.646317e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.626688e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.693076e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,9 +445,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1
  [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3809s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3477s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0325s for     8192 events => throughput is 2.52E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3735s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3382s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0346s for     8192 events => throughput is 2.37E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -479,10 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1
  [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8986s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5431s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3549s for    90112 events => throughput is 2.54E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [COUNTERS] PROGRAM TOTAL          :    1.9409s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5647s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3756s for    90112 events => throughput is 2.40E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.412835e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.391806e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.491870e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.360348e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,9 +523,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110539351263363] fbridge_mode=1
  [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7705s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7685s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0008s for     8192 events => throughput is 1.03E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.7677s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7657s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0008s for     8192 events => throughput is 1.08E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -557,9 +557,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2151 [0.21510686556561304] fbridge_mode=1
  [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.9737s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.9648s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0078s for    90112 events => throughput is 1.15E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.0026s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9932s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0081s for    90112 events => throughput is 1.11E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.555983e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.566081e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.037158e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.981957e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.629928e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.577541e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.566255e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.550054e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.636845e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.565709e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.850724e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.833768e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.619360e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.582119e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.790736e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.784852e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt
index 8ac388b886..7746e8ccdc 100644
--- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt
@@ -13,17 +13,17 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
-make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-08_20:58:38
+DATE: 2024-08-30_01:07:25
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0
  [UNWEIGHT] Wrote 404 events (found 1817 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4756s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4044s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0711s for     8192 events => throughput is 1.15E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4734s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4014s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0720s for     8192 events => throughput is 1.14E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0
  [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4108s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3420s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0688s for     8192 events => throughput is 1.19E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4152s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3416s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0736s for     8192 events => throughput is 1.11E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0
  [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    2.3245s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5525s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.7719s for    90112 events => throughput is 1.17E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.3818s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5842s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.7976s for    90112 events => throughput is 1.13E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,10 +133,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110463158198617] fbridge_mode=1
  [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4137s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3419s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0712s for     8192 events => throughput is 1.15E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [COUNTERS] PROGRAM TOTAL          :    0.4111s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3367s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0736s for     8192 events => throughput is 1.11E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2151 [0.21510686347932190] fbridge_mode=1
  [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    2.3233s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5375s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.7851s for    90112 events => throughput is 1.15E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.3828s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5705s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8117s for    90112 events => throughput is 1.11E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.154270e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.111352e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.117776e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.122906e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,9 +211,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110459183868807] fbridge_mode=1
  [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3703s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3439s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0260s for     8192 events => throughput is 3.15E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3640s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3367s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0269s for     8192 events => throughput is 3.05E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -245,10 +245,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2151 [0.21510683073685827] fbridge_mode=1
  [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8197s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5348s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2844s for    90112 events => throughput is 3.17E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [COUNTERS] PROGRAM TOTAL          :    1.8573s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5610s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2959s for    90112 events => throughput is 3.05E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.998738e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.037364e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.994620e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.973321e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,9 +289,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110460727141733] fbridge_mode=1
  [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3581s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3447s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0130s for     8192 events => throughput is 6.29E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3493s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3354s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0135s for     8192 events => throughput is 6.08E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -323,9 +323,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2151 [0.21510682516942223] fbridge_mode=1
  [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.6873s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5442s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1427s for    90112 events => throughput is 6.31E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.7115s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5638s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1473s for    90112 events => throughput is 6.12E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.110364e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.189171e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.231132e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.287593e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,9 +367,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110460727141733] fbridge_mode=1
  [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3551s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3423s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0124s for     8192 events => throughput is 6.61E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3474s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3347s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0123s for     8192 events => throughput is 6.66E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -401,9 +401,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2151 [0.21510682516942223] fbridge_mode=1
  [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.6706s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5390s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1312s for    90112 events => throughput is 6.87E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.6980s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5613s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1363s for    90112 events => throughput is 6.61E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.737889e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.705294e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.863785e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.751048e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,9 +445,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110464220032526] fbridge_mode=1
  [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3592s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3420s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0167s for     8192 events => throughput is 4.91E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3554s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3380s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0168s for     8192 events => throughput is 4.87E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -479,9 +479,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2151 [0.21510685471570221] fbridge_mode=1
  [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7199s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5400s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1795s for    90112 events => throughput is 5.02E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.7520s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5628s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1886s for    90112 events => throughput is 4.78E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.872478e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.699105e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.938459e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.760510e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,9 +523,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110477321990667] fbridge_mode=1
  [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7679s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7663s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.31E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.7659s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7644s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.33E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0009s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -557,10 +557,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2151 [0.21510689318513457] fbridge_mode=1
  [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.9690s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.9617s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0063s for    90112 events => throughput is 1.43E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0009s
+ [COUNTERS] PROGRAM TOTAL          :    2.0120s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0043s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0068s for    90112 events => throughput is 1.33E+07 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0010s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.567743e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.646157e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.424411e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.427430e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.006580e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.058841e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.460162e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.444069e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.113271e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.942937e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.506902e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.493709e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.545880e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.477775e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.393633e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.312163e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt
index 25661e1063..cac7bc2d3a 100644
--- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt
@@ -1,19 +1,19 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu
 
-
 make USEBUILDDIR=1 BACKEND=cuda
-make USEBUILDDIR=1 BACKEND=cppnone
 
 
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make USEBUILDDIR=1 BACKEND=cppsse4
+
+make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppavx2
 
 make USEBUILDDIR=1 BACKEND=cpp512y
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-08_20:59:06
+DATE: 2024-08-30_01:07:54
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0
  [UNWEIGHT] Wrote 404 events (found 1817 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4768s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4060s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0709s for     8192 events => throughput is 1.16E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4748s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4019s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0728s for     8192 events => throughput is 1.12E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0
  [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4179s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3473s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0706s for     8192 events => throughput is 1.16E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4102s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3379s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0723s for     8192 events => throughput is 1.13E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0
  [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    2.3258s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5517s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.7741s for    90112 events => throughput is 1.16E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.3880s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5917s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.7964s for    90112 events => throughput is 1.13E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110539350666329] fbridge_mode=1
  [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4207s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3437s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0763s for     8192 events => throughput is 1.07E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4162s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3378s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0776s for     8192 events => throughput is 1.06E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -167,10 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2151 [0.21510686560103207] fbridge_mode=1
  [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    2.3663s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5373s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8282s for    90112 events => throughput is 1.09E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [COUNTERS] PROGRAM TOTAL          :    2.4355s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5736s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8611s for    90112 events => throughput is 1.05E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.091070e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.055368e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.097593e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.053968e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,9 +211,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110539350666335] fbridge_mode=1
  [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3890s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3472s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0412s for     8192 events => throughput is 1.99E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3844s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3408s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0429s for     8192 events => throughput is 1.91E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -245,9 +245,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2151 [0.21510686560103204] fbridge_mode=1
  [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.9944s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5398s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4540s for    90112 events => throughput is 1.98E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.0360s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5625s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4729s for    90112 events => throughput is 1.91E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.922053e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.911607e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.990970e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.920193e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,9 +289,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110539330887440] fbridge_mode=1
  [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3734s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3492s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0237s for     8192 events => throughput is 3.46E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3629s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3374s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0249s for     8192 events => throughput is 3.29E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -323,9 +323,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2151 [0.21510686557693198] fbridge_mode=1
  [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8003s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5375s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2622s for    90112 events => throughput is 3.44E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.8345s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5624s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2714s for    90112 events => throughput is 3.32E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.424784e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.345273e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.455227e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.258323e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,9 +367,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110539330887440] fbridge_mode=1
  [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3680s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3463s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0211s for     8192 events => throughput is 3.88E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3609s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3379s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0225s for     8192 events => throughput is 3.64E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -401,9 +401,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2151 [0.21510686557693198] fbridge_mode=1
  [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7822s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5448s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2369s for    90112 events => throughput is 3.80E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.8072s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5644s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2423s for    90112 events => throughput is 3.72E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.843024e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.708299e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.890496e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.793939e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,9 +445,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110539330887440] fbridge_mode=1
  [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3872s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3503s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0362s for     8192 events => throughput is 2.26E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3730s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3362s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0361s for     8192 events => throughput is 2.27E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -479,10 +479,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2151 [0.21510686557693198] fbridge_mode=1
  [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.9147s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5452s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3689s for    90112 events => throughput is 2.44E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [COUNTERS] PROGRAM TOTAL          :    1.9604s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5708s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3889s for    90112 events => throughput is 2.32E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.300565e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.276917e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.415614e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.303186e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,9 +523,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2711 [0.27110539343558537] fbridge_mode=1
  [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7684s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7665s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0008s for     8192 events => throughput is 1.09E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.7690s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7671s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0008s for     8192 events => throughput is 1.08E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -557,9 +557,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 0.2151 [0.21510686553631395] fbridge_mode=1
  [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.9688s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.9599s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0078s for    90112 events => throughput is 1.15E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.0035s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9941s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0082s for    90112 events => throughput is 1.10E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.565914e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.605327e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.104681e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.982731e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.636309e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.566857e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.555697e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.554656e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.642280e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.578971e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.824016e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.820298e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.612307e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.584085e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.778614e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.787764e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt
index 9204db3db0..599f2d92c2 100644
--- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt
@@ -1,22 +1,22 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 
-make USEBUILDDIR=1 BACKEND=cuda
 
 
+make USEBUILDDIR=1 BACKEND=cuda
 make USEBUILDDIR=1 BACKEND=cppnone
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-
 make USEBUILDDIR=1 BACKEND=cppavx2
+
 make USEBUILDDIR=1 BACKEND=cpp512y
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-09_00:48:38
+DATE: 2024-08-30_05:04:08
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0
  [UNWEIGHT] Wrote 3321 events (found 6423 events)
- [COUNTERS] PROGRAM TOTAL          :    0.9141s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8671s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0470s for     8192 events => throughput is 1.74E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.9413s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8936s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0477s for     8192 events => throughput is 1.72E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0
  [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4185s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3716s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0468s for     8192 events => throughput is 1.75E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4211s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3732s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0479s for     8192 events => throughput is 1.71E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0
  [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7982s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2863s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.5119s for    90112 events => throughput is 1.76E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.8422s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3150s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.5273s for    90112 events => throughput is 1.71E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0162955499256148] fbridge_mode=1
  [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4199s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3695s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0500s for     8192 events => throughput is 1.64E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4307s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3786s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0515s for     8192 events => throughput is 1.59E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1
  [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8165s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2690s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5470s for    90112 events => throughput is 1.65E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.8612s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2987s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5620s for    90112 events => throughput is 1.60E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.683813e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.636065e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.668738e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.650747e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,9 +211,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1
  [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4071s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3797s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0270s for     8192 events => throughput is 3.03E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3992s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3709s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0279s for     8192 events => throughput is 2.94E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -245,9 +245,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1
  [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5672s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2711s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2957s for    90112 events => throughput is 3.05E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.6028s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2911s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3113s for    90112 events => throughput is 2.89E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.037815e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.975115e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.993910e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.964788e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,9 +289,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0162955499256232] fbridge_mode=1
  [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3883s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3715s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0164s for     8192 events => throughput is 5.00E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3885s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3708s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0172s for     8192 events => throughput is 4.76E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -323,9 +323,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.043 [2.0434895240377489] fbridge_mode=1
  [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4641s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2801s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1835s for    90112 events => throughput is 4.91E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.4811s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2942s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1864s for    90112 events => throughput is 4.83E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.902798e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.599662e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.886099e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.637890e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,9 +367,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0162955499256232] fbridge_mode=1
  [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3876s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3719s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0152s for     8192 events => throughput is 5.38E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4631s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4466s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0161s for     8192 events => throughput is 5.09E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -401,9 +401,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.043 [2.0434895240377489] fbridge_mode=1
  [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4216s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2567s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1645s for    90112 events => throughput is 5.48E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.4597s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2887s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1706s for    90112 events => throughput is 5.28E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.361206e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.357156e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.494947e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.487847e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,9 +445,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1
  [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3960s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3733s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0222s for     8192 events => throughput is 3.68E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3943s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3702s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0235s for     8192 events => throughput is 3.48E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -479,9 +479,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1
  [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5023s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2627s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2391s for    90112 events => throughput is 3.77E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.5502s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2928s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2568s for    90112 events => throughput is 3.51E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.615246e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.535346e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.662708e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.536932e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,9 +523,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggb
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0162955499256165] fbridge_mode=1
  [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7949s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7934s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0007s for     8192 events => throughput is 1.20E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8001s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7986s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0007s for     8192 events => throughput is 1.22E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -557,9 +557,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggb
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.043 [2.0434895240377573] fbridge_mode=1
  [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7013s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.6935s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0069s for    90112 events => throughput is 1.30E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.7266s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.7188s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0070s for    90112 events => throughput is 1.29E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.844829e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.985964e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.285195e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.226056e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.255268e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.193230e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.760215e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.709507e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.235451e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.177681e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.038893e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.042958e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.241445e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.170672e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.725782e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.763585e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt
index ae36851550..d9149a96bc 100644
--- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt
@@ -1,22 +1,22 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 
-
 make USEBUILDDIR=1 BACKEND=cuda
 
-make USEBUILDDIR=1 BACKEND=cppnone
 
+
+make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppsse4
-make USEBUILDDIR=1 BACKEND=cppavx2
 
+make USEBUILDDIR=1 BACKEND=cppavx2
 make USEBUILDDIR=1 BACKEND=cpp512y
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-09_00:49:04
+DATE: 2024-08-30_05:04:35
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0
  [UNWEIGHT] Wrote 3321 events (found 6423 events)
- [COUNTERS] PROGRAM TOTAL          :    0.9394s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8922s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0473s for     8192 events => throughput is 1.73E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.9322s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8838s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0484s for     8192 events => throughput is 1.69E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0
  [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4203s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3728s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0475s for     8192 events => throughput is 1.72E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4198s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3714s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0484s for     8192 events => throughput is 1.69E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0
  [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7988s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2854s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.5133s for    90112 events => throughput is 1.76E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.8381s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3115s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.5266s for    90112 events => throughput is 1.71E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0162897355760356] fbridge_mode=1
  [UNWEIGHT] Wrote 1620 events (found 1625 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4180s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3713s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0463s for     8192 events => throughput is 1.77E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4164s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3687s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0472s for     8192 events => throughput is 1.73E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt
index d90f539fcf..93c59c43c2 100644
--- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt
@@ -4,8 +4,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/h
 make USEBUILDDIR=1 BACKEND=cuda
 
 make USEBUILDDIR=1 BACKEND=cppnone
-
 make USEBUILDDIR=1 BACKEND=cppsse4
+
 make USEBUILDDIR=1 BACKEND=cppavx2
 
 make USEBUILDDIR=1 BACKEND=cpp512y
@@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-09_00:49:10
+DATE: 2024-08-30_05:04:41
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0
  [UNWEIGHT] Wrote 3321 events (found 6423 events)
- [COUNTERS] PROGRAM TOTAL          :    0.9158s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8684s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0474s for     8192 events => throughput is 1.73E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.9477s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8996s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0481s for     8192 events => throughput is 1.70E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0
  [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4209s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3739s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0470s for     8192 events => throughput is 1.74E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4203s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3730s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0473s for     8192 events => throughput is 1.73E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0
  [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8008s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2889s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.5118s for    90112 events => throughput is 1.76E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.8386s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3127s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.5259s for    90112 events => throughput is 1.71E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -134,9 +134,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0162955975930954] fbridge_mode=1
  [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4229s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3736s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0488s for     8192 events => throughput is 1.68E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4234s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3719s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0509s for     8192 events => throughput is 1.61E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -169,9 +169,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.043 [2.0434895706383660] fbridge_mode=1
  [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8077s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2621s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5452s for    90112 events => throughput is 1.65E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.8536s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2929s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5602s for    90112 events => throughput is 1.61E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -186,13 +186,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.584312e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.543543e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.572139e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.554670e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -216,10 +216,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0162955975930958] fbridge_mode=1
  [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4000s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3717s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0278s for     8192 events => throughput is 2.94E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [COUNTERS] PROGRAM TOTAL          :    0.3974s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3691s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0279s for     8192 events => throughput is 2.94E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -251,10 +251,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.043 [2.0434895706383669] fbridge_mode=1
  [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.6068s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3000s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3063s for    90112 events => throughput is 2.94E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [COUNTERS] PROGRAM TOTAL          :    1.6024s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2932s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3087s for    90112 events => throughput is 2.92E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -268,13 +268,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.801476e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.657975e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.739519e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.837138e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -298,9 +298,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0162955953696393] fbridge_mode=1
  [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4107s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3912s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0191s for     8192 events => throughput is 4.29E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3887s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3707s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0175s for     8192 events => throughput is 4.69E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -333,10 +333,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.043 [2.0434895701245432] fbridge_mode=1
  [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4541s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2695s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1842s for    90112 events => throughput is 4.89E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [COUNTERS] PROGRAM TOTAL          :    1.4978s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3038s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1936s for    90112 events => throughput is 4.65E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -350,13 +350,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.846731e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.667384e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.806331e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.720430e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -380,10 +380,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0162955953696393] fbridge_mode=1
  [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3903s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3744s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0155s for     8192 events => throughput is 5.29E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [COUNTERS] PROGRAM TOTAL          :    0.3879s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3718s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0157s for     8192 events => throughput is 5.23E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -415,10 +415,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.043 [2.0434895701245432] fbridge_mode=1
  [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4306s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2629s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1673s for    90112 events => throughput is 5.39E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [COUNTERS] PROGRAM TOTAL          :    1.4676s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2932s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1740s for    90112 events => throughput is 5.18E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -432,13 +432,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.198253e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.145643e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.334338e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.181518e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -462,9 +462,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1
  [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4086s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3841s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0240s for     8192 events => throughput is 3.41E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3947s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3700s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0242s for     8192 events => throughput is 3.38E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -497,9 +497,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1
  [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5232s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2714s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2514s for    90112 events => throughput is 3.58E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.5616s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2951s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2660s for    90112 events => throughput is 3.39E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -514,13 +514,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.375382e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.165146e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.300552e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.207013e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -543,9 +543,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggb
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0162955503257827] fbridge_mode=1
  [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7989s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7974s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0007s for     8192 events => throughput is 1.20E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.7985s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7970s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0007s for     8192 events => throughput is 1.25E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -577,9 +577,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggb
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.043 [2.0434895242795732] fbridge_mode=1
  [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.6979s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.6904s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0066s for    90112 events => throughput is 1.36E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.7427s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.7348s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0071s for    90112 events => throughput is 1.26E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -593,42 +593,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.835154e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.059399e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.144694e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.161680e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.230105e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.190897e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.705062e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.747163e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.235322e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.176819e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.035545e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.038339e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.242431e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.199275e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.754474e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.748317e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt
index 5562e4c07e..68ef447e1c 100644
--- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt
@@ -2,9 +2,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s
 
 make USEBUILDDIR=1 BACKEND=cuda
 
-
 make USEBUILDDIR=1 BACKEND=cppnone
 
+
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
 
@@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-09_00:52:08
+DATE: 2024-08-30_05:07:41
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 1 events (found 1041 events)
- [COUNTERS] PROGRAM TOTAL          :    2.5941s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3442s
- [COUNTERS] Fortran MEs      ( 1 ) :    2.2499s for     8192 events => throughput is 3.64E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.6583s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3435s
+ [COUNTERS] Fortran MEs      ( 1 ) :    2.3148s for     8192 events => throughput is 3.54E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6220s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3462s
- [COUNTERS] Fortran MEs      ( 1 ) :    2.2759s for     8192 events => throughput is 3.60E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.6583s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3400s
+ [COUNTERS] Fortran MEs      ( 1 ) :    2.3182s for     8192 events => throughput is 3.53E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :   26.7017s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8086s
- [COUNTERS] Fortran MEs      ( 1 ) :   24.8931s for    90112 events => throughput is 3.62E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :   27.3144s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8056s
+ [COUNTERS] Fortran MEs      ( 1 ) :   25.5088s for    90112 events => throughput is 3.53E+03 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,10 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    2.7821s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3463s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    2.4305s for     8192 events => throughput is 3.37E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0052s
+ [COUNTERS] PROGRAM TOTAL          :    2.8421s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3410s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    2.4957s for     8192 events => throughput is 3.28E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0053s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -167,10 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.667e-07 [7.6668083551438187E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :   28.5017s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7808s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   26.7158s for    90112 events => throughput is 3.37E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0052s
+ [COUNTERS] PROGRAM TOTAL          :   29.2164s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8016s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   27.4095s for    90112 events => throughput is 3.29E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0053s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.542884e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.417499e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.530103e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.436809e+03                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,10 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896697955084412E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    1.6103s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3441s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.2634s for     8192 events => throughput is 6.48E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0028s
+ [COUNTERS] PROGRAM TOTAL          :    1.6539s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3409s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.3101s for     8192 events => throughput is 6.25E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0029s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -245,10 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :   15.9197s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7936s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   14.1234s for    90112 events => throughput is 6.38E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0027s
+ [COUNTERS] PROGRAM TOTAL          :   16.3084s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8060s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   14.4993s for    90112 events => throughput is 6.21E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0030s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.656588e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.430064e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.664988e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.462632e+03                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,10 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    0.9116s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3446s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5653s for     8192 events => throughput is 1.45E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0017s
+ [COUNTERS] PROGRAM TOTAL          :    0.9243s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3403s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5823s for     8192 events => throughput is 1.41E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0018s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -323,9 +323,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    8.0033s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7755s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    6.2261s for    90112 events => throughput is 1.45E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    8.2383s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8071s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    6.4295s for    90112 events => throughput is 1.40E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0017s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.485686e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.441882e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.488153e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.451357e+04                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,9 +367,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8483s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3476s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4991s for     8192 events => throughput is 1.64E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8589s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3412s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5161s for     8192 events => throughput is 1.59E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0016s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -401,10 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    7.2914s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7820s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    5.5079s for    90112 events => throughput is 1.64E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0015s
+ [COUNTERS] PROGRAM TOTAL          :    7.4852s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8014s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    5.6822s for    90112 events => throughput is 1.59E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0016s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.693554e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.632945e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.678028e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.644376e+04                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,10 +445,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    0.9859s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3430s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.6411s for     8192 events => throughput is 1.28E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0018s
+ [COUNTERS] PROGRAM TOTAL          :    1.0252s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3429s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.6803s for     8192 events => throughput is 1.20E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0020s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -479,9 +479,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    8.8930s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7934s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    7.0976s for    90112 events => throughput is 1.27E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    9.2840s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8079s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    7.4740s for    90112 events => throughput is 1.21E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0021s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.269596e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.223468e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.304260e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.221461e+04                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,10 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8106s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7739s
+ [COUNTERS] PROGRAM TOTAL          :    0.8083s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7715s
  [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0172s for     8192 events => throughput is 4.76E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0196s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0195s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -557,9 +557,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    2.4031s
- [COUNTERS] Fortran Overhead ( 0 ) :    2.1951s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1884s for    90112 events => throughput is 4.78E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.4407s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.2322s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1889s for    90112 events => throughput is 4.77E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0195s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.836004e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.831207e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.223426e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.210893e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.196129e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.189493e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.417377e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.418574e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.149870e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.138850e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.416796e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.408292e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.156718e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.156050e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.752894e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.757548e+05                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt
index e6a1cba79b..161dd39f0e 100644
--- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt
@@ -1,10 +1,10 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 
-make USEBUILDDIR=1 BACKEND=cuda
 
+make USEBUILDDIR=1 BACKEND=cuda
+make USEBUILDDIR=1 BACKEND=cppnone
 
 
-make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
 
@@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-09_00:54:32
+DATE: 2024-08-30_05:10:07
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 1 events (found 1041 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6010s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3425s
- [COUNTERS] Fortran MEs      ( 1 ) :    2.2584s for     8192 events => throughput is 3.63E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.6746s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3402s
+ [COUNTERS] Fortran MEs      ( 1 ) :    2.3343s for     8192 events => throughput is 3.51E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6135s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3438s
- [COUNTERS] Fortran MEs      ( 1 ) :    2.2696s for     8192 events => throughput is 3.61E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.6608s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3466s
+ [COUNTERS] Fortran MEs      ( 1 ) :    2.3142s for     8192 events => throughput is 3.54E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :   26.5878s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7922s
- [COUNTERS] Fortran MEs      ( 1 ) :   24.7956s for    90112 events => throughput is 3.63E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :   27.2560s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.7984s
+ [COUNTERS] Fortran MEs      ( 1 ) :   25.4576s for    90112 events => throughput is 3.54E+03 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,10 +133,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896784952157763E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    2.7487s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3437s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    2.4000s for     8192 events => throughput is 3.41E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0050s
+ [COUNTERS] PROGRAM TOTAL          :    2.8058s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3400s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    2.4606s for     8192 events => throughput is 3.33E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0052s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -167,10 +167,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.667e-07 [7.6668138450782073E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :   28.1446s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7932s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   26.3466s for    90112 events => throughput is 3.42E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0048s
+ [COUNTERS] PROGRAM TOTAL          :   28.8449s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8046s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   27.0351s for    90112 events => throughput is 3.33E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0052s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.577022e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.458764e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.590866e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.452922e+03                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,10 +211,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896766542858863E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    1.0076s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3437s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.6623s for     8192 events => throughput is 1.24E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0016s
+ [COUNTERS] PROGRAM TOTAL          :    1.0243s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3393s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.6833s for     8192 events => throughput is 1.20E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0017s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -245,10 +245,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.667e-07 [7.6668121906848987E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    9.0575s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7825s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    7.2734s for    90112 events => throughput is 1.24E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0016s
+ [COUNTERS] PROGRAM TOTAL          :    9.3335s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8075s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    7.5244s for    90112 events => throughput is 1.20E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0017s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.265218e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.227692e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.265996e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.229550e+04                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,9 +289,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6296s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3461s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2826s for     8192 events => throughput is 2.90E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.6384s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3405s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2969s for     8192 events => throughput is 2.76E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0010s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -323,10 +323,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    4.9000s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7718s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    3.1273s for    90112 events => throughput is 2.88E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0010s
+ [COUNTERS] PROGRAM TOTAL          :    5.0435s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8033s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    3.2391s for    90112 events => throughput is 2.78E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.939784e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.846216e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.964350e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.859531e+04                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,10 +367,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6110s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3506s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2595s for     8192 events => throughput is 3.16E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0009s
+ [COUNTERS] PROGRAM TOTAL          :    0.6113s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3429s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2674s for     8192 events => throughput is 3.06E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0010s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -401,10 +401,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    4.6623s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7820s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    2.8794s for    90112 events => throughput is 3.13E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0009s
+ [COUNTERS] PROGRAM TOTAL          :    4.7543s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8053s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    2.9479s for    90112 events => throughput is 3.06E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0010s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.263231e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.159800e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.247254e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.158171e+04                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,9 +445,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896778056937195E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6684s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3460s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3212s for     8192 events => throughput is 2.55E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.6860s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3420s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3427s for     8192 events => throughput is 2.39E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0013s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -479,10 +479,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.667e-07 [7.6668139178203571E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    5.3279s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7717s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    3.5549s for    90112 events => throughput is 2.53E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0013s
+ [COUNTERS] PROGRAM TOTAL          :    5.5700s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8019s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    3.7669s for    90112 events => throughput is 2.39E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.589261e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.384494e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.602723e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.358993e+04                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,10 +523,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896802503195373E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8100s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7757s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0172s for     8192 events => throughput is 4.77E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0171s
+ [COUNTERS] PROGRAM TOTAL          :    0.8066s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7726s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0171s for     8192 events => throughput is 4.80E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0170s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -557,10 +557,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.667e-07 [7.6668190930428073E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    2.3814s
- [COUNTERS] Fortran Overhead ( 0 ) :    2.1945s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1700s for    90112 events => throughput is 5.30E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0169s
+ [COUNTERS] PROGRAM TOTAL          :    2.4214s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.2342s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1702s for    90112 events => throughput is 5.29E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0170s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.860775e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.887616e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.139558e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.141211e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.304686e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.326572e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.344126e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.344235e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.335964e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.327212e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.345203e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.345274e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.314317e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.322182e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.679665e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.678752e+05                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt
index 7e343e91b1..21c70ea563 100644
--- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt
@@ -1,10 +1,10 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 
 
-make USEBUILDDIR=1 BACKEND=cuda
-
 
+make USEBUILDDIR=1 BACKEND=cuda
 make USEBUILDDIR=1 BACKEND=cppnone
+
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
 
@@ -13,8 +13,8 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
-
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-09_00:56:30
+DATE: 2024-08-30_05:12:08
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 1 events (found 1041 events)
- [COUNTERS] PROGRAM TOTAL          :    2.5870s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3434s
- [COUNTERS] Fortran MEs      ( 1 ) :    2.2435s for     8192 events => throughput is 3.65E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.6771s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3415s
+ [COUNTERS] Fortran MEs      ( 1 ) :    2.3355s for     8192 events => throughput is 3.51E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    2.5935s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3429s
- [COUNTERS] Fortran MEs      ( 1 ) :    2.2507s for     8192 events => throughput is 3.64E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.6590s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3414s
+ [COUNTERS] Fortran MEs      ( 1 ) :    2.3176s for     8192 events => throughput is 3.53E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :   26.4482s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7926s
- [COUNTERS] Fortran MEs      ( 1 ) :   24.6556s for    90112 events => throughput is 3.65E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :   27.2570s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8020s
+ [COUNTERS] Fortran MEs      ( 1 ) :   25.4551s for    90112 events => throughput is 3.54E+03 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,10 +133,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896696375074447E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    2.7899s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3466s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    2.4385s for     8192 events => throughput is 3.36E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0049s
+ [COUNTERS] PROGRAM TOTAL          :    2.9720s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3489s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    2.6176s for     8192 events => throughput is 3.13E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0054s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -167,10 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.667e-07 [7.6668081976882373E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :   28.6799s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7926s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   26.8820s for    90112 events => throughput is 3.35E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0052s
+ [COUNTERS] PROGRAM TOTAL          :   29.4504s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8086s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   27.6363s for    90112 events => throughput is 3.26E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0054s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.507267e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.407468e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.511786e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.398569e+03                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,10 +211,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896696285825688E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5883s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3421s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.2436s for     8192 events => throughput is 6.59E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0027s
+ [COUNTERS] PROGRAM TOTAL          :    1.6359s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3461s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.2869s for     8192 events => throughput is 6.37E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0029s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -245,10 +245,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.667e-07 [7.6668081890954375E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :   15.4498s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7701s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   13.6770s for    90112 events => throughput is 6.59E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0028s
+ [COUNTERS] PROGRAM TOTAL          :   16.2099s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8349s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   14.3720s for    90112 events => throughput is 6.27E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0030s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.943689e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.609107e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.925887e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.653433e+03                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,10 +289,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    0.9098s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3504s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5576s for     8192 events => throughput is 1.47E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0018s
+ [COUNTERS] PROGRAM TOTAL          :    0.9279s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3431s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5830s for     8192 events => throughput is 1.41E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0017s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -323,10 +323,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    7.9207s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7702s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    6.1490s for    90112 events => throughput is 1.47E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0016s
+ [COUNTERS] PROGRAM TOTAL          :    8.3102s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8480s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    6.4604s for    90112 events => throughput is 1.39E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0018s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.518105e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.427331e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.514088e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.434667e+04                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,10 +367,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8334s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3445s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4873s for     8192 events => throughput is 1.68E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0016s
+ [COUNTERS] PROGRAM TOTAL          :    0.8698s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3480s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5202s for     8192 events => throughput is 1.57E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0017s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -401,10 +401,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    7.1725s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7642s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    5.4067s for    90112 events => throughput is 1.67E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0015s
+ [COUNTERS] PROGRAM TOTAL          :    7.5428s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8367s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    5.7043s for    90112 events => throughput is 1.58E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0017s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.710218e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.635041e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.722202e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.637970e+04                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,10 +445,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    0.9928s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3430s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.6479s for     8192 events => throughput is 1.26E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0019s
+ [COUNTERS] PROGRAM TOTAL          :    1.0618s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3493s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.7103s for     8192 events => throughput is 1.15E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0022s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -479,10 +479,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    9.0659s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7892s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    7.2749s for    90112 events => throughput is 1.24E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0018s
+ [COUNTERS] PROGRAM TOTAL          :    9.5434s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8253s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    7.7159s for    90112 events => throughput is 1.17E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0021s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.210214e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.208934e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.254889e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.204603e+04                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,10 +523,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.99e-07 [7.9896697918297644E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8127s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7760s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0173s for     8192 events => throughput is 4.75E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0195s
+ [COUNTERS] PROGRAM TOTAL          :    0.8109s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7740s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0173s for     8192 events => throughput is 4.74E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0196s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -557,10 +557,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 7.667e-07 [7.6668083551547592E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    2.4045s
- [COUNTERS] Fortran Overhead ( 0 ) :    2.1952s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1896s for    90112 events => throughput is 4.75E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0197s
+ [COUNTERS] PROGRAM TOTAL          :    2.4522s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.2422s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1904s for    90112 events => throughput is 4.73E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0196s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.814747e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.781444e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.187533e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.182913e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.164029e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.157874e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.389995e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.386488e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.128645e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.107983e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.372948e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.377808e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.119403e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.110849e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.750060e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.750476e+05                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt
index 0fe0851e40..d53f8d5c95 100644
--- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt
@@ -1,11 +1,11 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 
-make USEBUILDDIR=1 BACKEND=cuda
 
+make USEBUILDDIR=1 BACKEND=cuda
 
 make USEBUILDDIR=1 BACKEND=cppnone
-
 make USEBUILDDIR=1 BACKEND=cppsse4
+
 make USEBUILDDIR=1 BACKEND=cppavx2
 
 make USEBUILDDIR=1 BACKEND=cpp512y
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-09_00:50:54
+DATE: 2024-08-30_05:06:26
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0
  [UNWEIGHT] Wrote 1767 events (found 4306 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6580s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6494s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0085s for     8192 events => throughput is 9.58E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.6644s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6558s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0087s for     8192 events => throughput is 9.43E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0
  [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3938s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3851s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0086s for     8192 events => throughput is 9.50E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3909s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3823s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0087s for     8192 events => throughput is 9.46E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.311 [0.31098556244384407] fbridge_mode=0
  [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4272s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3345s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0927s for    90112 events => throughput is 9.72E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.4368s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3432s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0936s for    90112 events => throughput is 9.63E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1
  [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3960s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3874s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0082s for     8192 events => throughput is 9.99E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3913s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3824s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0084s for     8192 events => throughput is 9.70E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1
  [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4271s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3353s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0914s for    90112 events => throughput is 9.86E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.4316s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3380s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0932s for    90112 events => throughput is 9.67E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.006217e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.991231e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.022578e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.007459e+06                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,9 +211,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1
  [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3903s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3856s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0043s for     8192 events => throughput is 1.89E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3890s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3840s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0046s for     8192 events => throughput is 1.79E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -245,9 +245,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1
  [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3937s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3444s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0489s for    90112 events => throughput is 1.84E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.3910s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3397s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0509s for    90112 events => throughput is 1.77E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.897485e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.890551e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.985824e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.963594e+06                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,9 +289,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1
  [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3921s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3888s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0028s for     8192 events => throughput is 2.88E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3870s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3837s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0028s for     8192 events => throughput is 2.93E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -323,9 +323,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1
  [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3531s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3221s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0306s for    90112 events => throughput is 2.95E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.3706s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3379s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0322s for    90112 events => throughput is 2.79E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.126014e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.034126e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.364824e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.221092e+06                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,9 +367,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1
  [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3883s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3854s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0026s for     8192 events => throughput is 3.20E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3880s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3848s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0028s for     8192 events => throughput is 2.93E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -401,9 +401,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1
  [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3635s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3336s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0295s for    90112 events => throughput is 3.05E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.3742s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3423s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0315s for    90112 events => throughput is 2.86E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.285096e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.240649e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.423598e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.467487e+06                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,9 +445,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1
  [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3910s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3874s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.63E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3854s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3817s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0032s for     8192 events => throughput is 2.59E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -479,9 +479,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1
  [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3563s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3235s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0323s for    90112 events => throughput is 2.79E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.3744s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3392s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0347s for    90112 events => throughput is 2.60E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.866364e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.788936e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.134151e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.074319e+06                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,9 +523,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156027201869280] fbridge_mode=1
  [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8164s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8152s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.37E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8156s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8144s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.39E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -557,9 +557,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.311 [0.31098556244384401] fbridge_mode=1
  [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7576s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7518s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0052s for    90112 events => throughput is 1.72E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.7702s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.7641s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0055s for    90112 events => throughput is 1.65E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.730366e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.977913e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.967481e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.013766e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.198830e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.090365e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.649618e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.620997e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.170218e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.121235e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.903772e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.991647e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.201664e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.084242e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.319844e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.324810e+08                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt
index 5c4b04cd13..4417c97e0c 100644
--- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt
@@ -2,8 +2,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s
 
 make USEBUILDDIR=1 BACKEND=cuda
 
-make USEBUILDDIR=1 BACKEND=cppnone
 
+make USEBUILDDIR=1 BACKEND=cppnone
 
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-09_00:51:19
+DATE: 2024-08-30_05:06:51
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0
  [UNWEIGHT] Wrote 1767 events (found 4306 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6497s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6414s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0083s for     8192 events => throughput is 9.86E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.6549s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6462s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0087s for     8192 events => throughput is 9.43E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0
  [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4039s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3951s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0089s for     8192 events => throughput is 9.25E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3959s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3870s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0088s for     8192 events => throughput is 9.30E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.311 [0.31098556244384407] fbridge_mode=0
  [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4878s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3911s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0967s for    90112 events => throughput is 9.32E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.4361s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3410s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0951s for    90112 events => throughput is 9.47E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156021439979276] fbridge_mode=1
  [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3975s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3887s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0084s for     8192 events => throughput is 9.70E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3909s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3824s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0082s for     8192 events => throughput is 9.98E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.311 [0.31098550550786874] fbridge_mode=1
  [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4264s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3345s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0916s for    90112 events => throughput is 9.84E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.4299s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3366s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0930s for    90112 events => throughput is 9.69E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.034265e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.408795e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.024334e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.021260e+06                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,9 +211,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156021343761686] fbridge_mode=1
  [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3905s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3875s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0026s for     8192 events => throughput is 3.09E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3872s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3842s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0027s for     8192 events => throughput is 3.01E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -245,9 +245,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.311 [0.31098550488814170] fbridge_mode=1
  [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3711s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3420s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0289s for    90112 events => throughput is 3.12E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.3638s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3333s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0302s for    90112 events => throughput is 2.98E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.288372e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.285260e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.432097e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.345950e+06                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,9 +289,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156021516056748] fbridge_mode=1
  [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3889s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3868s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0018s for     8192 events => throughput is 4.52E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3861s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3838s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0019s for     8192 events => throughput is 4.22E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -323,9 +323,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.311 [0.31098550596898289] fbridge_mode=1
  [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3432s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3229s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0200s for    90112 events => throughput is 4.50E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.3595s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3386s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0206s for    90112 events => throughput is 4.38E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.077269e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.963399e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.403997e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.167457e+06                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,9 +367,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156021516056748] fbridge_mode=1
  [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3869s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3848s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0018s for     8192 events => throughput is 4.55E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3858s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3837s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0018s for     8192 events => throughput is 4.45E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -401,9 +401,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.311 [0.31098550596898289] fbridge_mode=1
  [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3387s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3197s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0187s for    90112 events => throughput is 4.81E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.3645s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3439s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0203s for    90112 events => throughput is 4.44E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.322495e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.054609e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.427973e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.604962e+06                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,8 +445,8 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156021917867366] fbridge_mode=1
  [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3878s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3853s
+ [COUNTERS] PROGRAM TOTAL          :    0.3906s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3880s
  [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0022s for     8192 events => throughput is 3.78E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
@@ -479,10 +479,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.311 [0.31098551029624061] fbridge_mode=1
  [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3406s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3185s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0218s for    90112 events => throughput is 4.14E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [COUNTERS] PROGRAM TOTAL          :    1.3833s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3593s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0236s for    90112 events => throughput is 3.82E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.424607e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.396678e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.888963e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.568316e+06                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,9 +523,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156022290359153] fbridge_mode=1
  [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8169s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8154s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.46E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8179s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8164s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.47E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0009s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -557,9 +557,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.311 [0.31098551341908548] fbridge_mode=1
  [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7464s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7407s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0049s for    90112 events => throughput is 1.85E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.7841s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.7780s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0052s for    90112 events => throughput is 1.74E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0009s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.032627e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.109830e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.278657e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.241375e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.543019e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.618711e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.578539e+09                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.567861e+09                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.555176e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.588931e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.658200e+09                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.665083e+09                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.883073e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.844966e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.705532e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.699487e+08                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt
index 62624c2c92..daf5b737bc 100644
--- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt
@@ -1,11 +1,11 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 
+make USEBUILDDIR=1 BACKEND=cuda
 
 
+make USEBUILDDIR=1 BACKEND=cppsse4
 
-make USEBUILDDIR=1 BACKEND=cuda
 make USEBUILDDIR=1 BACKEND=cppnone
-make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
 
 make USEBUILDDIR=1 BACKEND=cpp512y
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-09_00:51:44
+DATE: 2024-08-30_05:07:15
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0
  [UNWEIGHT] Wrote 1767 events (found 4306 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6493s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6409s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0084s for     8192 events => throughput is 9.81E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.6646s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6556s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0090s for     8192 events => throughput is 9.06E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0
  [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3992s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3909s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0083s for     8192 events => throughput is 9.85E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3932s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3846s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0086s for     8192 events => throughput is 9.53E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.311 [0.31098556244384407] fbridge_mode=0
  [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4133s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3208s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0925s for    90112 events => throughput is 9.75E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.4482s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3542s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0940s for    90112 events => throughput is 9.59E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156028014369008] fbridge_mode=1
  [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3950s
+ [COUNTERS] PROGRAM TOTAL          :    0.3955s
  [COUNTERS] Fortran Overhead ( 0 ) :    0.3864s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0082s for     8192 events => throughput is 9.94E+05 events/s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0086s for     8192 events => throughput is 9.48E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.311 [0.31098557069460298] fbridge_mode=1
  [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4087s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3177s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0906s for    90112 events => throughput is 9.95E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.4328s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3379s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0944s for    90112 events => throughput is 9.54E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.803386e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.887098e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.910254e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.512638e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,9 +211,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156028014369008] fbridge_mode=1
  [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3923s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3874s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0045s for     8192 events => throughput is 1.82E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3896s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3847s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0045s for     8192 events => throughput is 1.80E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -245,9 +245,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.311 [0.31098557069460298] fbridge_mode=1
  [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3653s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3175s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0474s for    90112 events => throughput is 1.90E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.3924s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3423s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0497s for    90112 events => throughput is 1.81E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.964224e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.970137e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.028853e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.025977e+06                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,9 +289,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156028097537258] fbridge_mode=1
  [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3954s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3923s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0027s for     8192 events => throughput is 3.03E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3875s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3843s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0028s for     8192 events => throughput is 2.96E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -323,9 +323,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.311 [0.31098557141632605] fbridge_mode=1
  [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3415s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3131s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0280s for    90112 events => throughput is 3.22E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.3703s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3390s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0309s for    90112 events => throughput is 2.92E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.237365e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.209026e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.416021e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.431270e+06                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,8 +367,8 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156028097537258] fbridge_mode=1
  [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3940s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3909s
+ [COUNTERS] PROGRAM TOTAL          :    0.3876s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3845s
  [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0027s for     8192 events => throughput is 3.04E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
@@ -401,9 +401,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.311 [0.31098557141632605] fbridge_mode=1
  [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3467s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3184s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0279s for    90112 events => throughput is 3.23E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.3745s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3437s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0304s for    90112 events => throughput is 2.96E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.347126e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.278252e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.589308e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.611379e+06                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,9 +445,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156028097537258] fbridge_mode=1
  [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3978s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3942s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.66E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3891s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3855s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.64E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -479,9 +479,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.311 [0.31098557141632605] fbridge_mode=1
  [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3501s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3186s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0311s for    90112 events => throughput is 2.90E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.3783s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.3437s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0341s for    90112 events => throughput is 2.64E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.904623e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.908043e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.114835e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.094537e+06                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,9 +523,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.3116 [0.31156027194560187] fbridge_mode=1
  [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8152s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8140s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.39E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8165s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8154s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.40E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -557,9 +557,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt
  [XSECTION] ChannelId = 3
  [XSECTION] Cross section = 0.311 [0.31098556243340819] fbridge_mode=1
  [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7501s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7444s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0051s for    90112 events => throughput is 1.75E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.7739s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.7677s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0056s for    90112 events => throughput is 1.61E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.842332e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.052309e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.019027e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.981616e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.214756e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.109375e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.517612e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.503882e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.171297e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.117951e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.740991e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.883236e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.214875e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.131877e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.310258e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.291537e+08                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt
index 6131633fdd..c4c5ee1ec5 100644
--- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt
@@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s
 
 
 make USEBUILDDIR=1 BACKEND=cuda
-
 make USEBUILDDIR=1 BACKEND=cppnone
-make USEBUILDDIR=1 BACKEND=cppsse4
 
+
+make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
 
 make USEBUILDDIR=1 BACKEND=cpp512y
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-09_00:49:37
+DATE: 2024-08-30_05:05:08
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0
  [UNWEIGHT] Wrote 2620 events (found 5403 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8016s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7599s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0418s for     8192 events => throughput is 1.96E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8194s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7770s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0423s for     8192 events => throughput is 1.94E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4173s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3758s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0415s for     8192 events => throughput is 1.97E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4148s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3722s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0426s for     8192 events => throughput is 1.92E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0
  [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.6984s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2478s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.4506s for    90112 events => throughput is 2.00E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.7361s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2691s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4670s for    90112 events => throughput is 1.93E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,10 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.6 [44.598860065419863] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4145s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3702s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0438s for     8192 events => throughput is 1.87E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [COUNTERS] PROGRAM TOTAL          :    0.4179s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3727s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0447s for     8192 events => throughput is 1.83E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=1
  [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7366s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2536s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4825s for    90112 events => throughput is 1.87E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.7703s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2738s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4960s for    90112 events => throughput is 1.82E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.880754e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.858086e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.882930e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.858737e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,9 +211,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3960s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3713s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0243s for     8192 events => throughput is 3.37E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3976s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3721s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0251s for     8192 events => throughput is 3.26E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -245,9 +245,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=1
  [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5199s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2483s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2711s for    90112 events => throughput is 3.32E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.5481s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2696s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2781s for    90112 events => throughput is 3.24E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.302363e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.271451e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.365112e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.291022e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,9 +289,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3924s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3765s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0155s for     8192 events => throughput is 5.28E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3883s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3723s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0156s for     8192 events => throughput is 5.25E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -323,9 +323,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1
  [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4183s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2503s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1675s for    90112 events => throughput is 5.38E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.4494s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2754s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1736s for    90112 events => throughput is 5.19E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.278183e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.204785e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.374748e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.255139e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,10 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3894s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3754s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0136s for     8192 events => throughput is 6.02E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [COUNTERS] PROGRAM TOTAL          :    0.3930s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3775s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0150s for     8192 events => throughput is 5.47E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -401,9 +401,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1
  [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3978s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2454s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1520s for    90112 events => throughput is 5.93E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.4285s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2693s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1588s for    90112 events => throughput is 5.67E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.775498e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.679020e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.841522e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.812224e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,9 +445,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4047s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3821s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0222s for     8192 events => throughput is 3.70E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3962s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3733s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0224s for     8192 events => throughput is 3.66E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -479,10 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1
  [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4927s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2545s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2377s for    90112 events => throughput is 3.79E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [COUNTERS] PROGRAM TOTAL          :    1.5262s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2749s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2508s for    90112 events => throughput is 3.59E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.798876e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.564809e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.612840e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.622774e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,9 +523,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8126s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8111s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0007s for     8192 events => throughput is 1.24E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8045s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8031s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0007s for     8192 events => throughput is 1.25E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -557,9 +557,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1
  [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.6862s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.6788s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0066s for    90112 events => throughput is 1.37E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.7059s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6982s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0068s for    90112 events => throughput is 1.32E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.869432e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.039338e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.714086e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.623593e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.311155e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.266679e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.083882e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.078171e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.322734e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.260477e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.159310e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.153595e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.296675e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.275336e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.098537e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.083603e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt
index 58b86df658..8dec5eb758 100644
--- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt
@@ -2,12 +2,12 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s
 
 make USEBUILDDIR=1 BACKEND=cuda
 
-make USEBUILDDIR=1 BACKEND=cppnone
 
+make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppsse4
 
-
 make USEBUILDDIR=1 BACKEND=cppavx2
+
 make USEBUILDDIR=1 BACKEND=cpp512y
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-09_00:50:03
+DATE: 2024-08-30_05:05:34
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0
  [UNWEIGHT] Wrote 2620 events (found 5403 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8051s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7635s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0416s for     8192 events => throughput is 1.97E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8122s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7694s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0428s for     8192 events => throughput is 1.91E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -84,8 +84,8 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/
  [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
  [COUNTERS] PROGRAM TOTAL          :    0.4148s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3740s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0408s for     8192 events => throughput is 2.01E+05 events/s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3721s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0427s for     8192 events => throughput is 1.92E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0
  [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7188s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2615s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.4573s for    90112 events => throughput is 1.97E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.7463s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2808s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4655s for    90112 events => throughput is 1.94E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,10 +133,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.6 [44.598853620719339] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4164s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3751s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0410s for     8192 events => throughput is 2.00E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [COUNTERS] PROGRAM TOTAL          :    0.4160s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3732s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0424s for     8192 events => throughput is 1.93E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.58 [44.577522280119403] fbridge_mode=1
  [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7041s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2499s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4538s for    90112 events => throughput is 1.99E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.7344s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2681s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4659s for    90112 events => throughput is 1.93E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.004528e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.951237e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.989674e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.969815e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,9 +211,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.6 [44.598849697851406] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3933s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3758s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0172s for     8192 events => throughput is 4.76E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3892s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3716s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0172s for     8192 events => throughput is 4.75E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -245,9 +245,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.58 [44.577518590213366] fbridge_mode=1
  [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4571s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2702s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1866s for    90112 events => throughput is 4.83E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.4598s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2700s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1895s for    90112 events => throughput is 4.76E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.766493e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.698064e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.711541e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.736294e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,9 +289,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.6 [44.598850036412124] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3932s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3838s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0091s for     8192 events => throughput is 8.99E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3829s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3733s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0093s for     8192 events => throughput is 8.82E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -323,9 +323,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.58 [44.577518612400254] fbridge_mode=1
  [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3456s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2495s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0958s for    90112 events => throughput is 9.40E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.3681s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2678s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0999s for    90112 events => throughput is 9.02E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.204759e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.141420e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.210555e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.161888e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,9 +367,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.6 [44.598850036412124] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3855s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3769s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0083s for     8192 events => throughput is 9.85E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3833s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3744s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0087s for     8192 events => throughput is 9.46E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -401,9 +401,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.58 [44.577518612400254] fbridge_mode=1
  [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3394s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2483s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0908s for    90112 events => throughput is 9.92E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.3696s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2744s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0950s for    90112 events => throughput is 9.49E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.706656e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.541329e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.233766e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.771909e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,9 +445,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.6 [44.598854350242270] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3868s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3748s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0116s for     8192 events => throughput is 7.03E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3864s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3739s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0121s for     8192 events => throughput is 6.77E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -479,10 +479,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.58 [44.577522751628507] fbridge_mode=1
  [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3825s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2565s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1256s for    90112 events => throughput is 7.17E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [COUNTERS] PROGRAM TOTAL          :    1.4212s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2851s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1357s for    90112 events => throughput is 6.64E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.942843e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.751639e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.910825e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.922683e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,9 +523,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.6 [44.598870301426373] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8091s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8078s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.43E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8028s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8015s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.44E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -557,9 +557,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.58 [44.577527268256027] fbridge_mode=1
  [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7098s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7033s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0058s for    90112 events => throughput is 1.56E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.7031s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6965s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0059s for    90112 events => throughput is 1.52E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.705094e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.981782e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.269887e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.227555e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.888199e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.915612e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.391800e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.420308e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.898622e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.890432e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.539526e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.551435e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.473018e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.474710e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.495430e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.460630e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt
index 75d0c77429..426277ef12 100644
--- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt
@@ -1,27 +1,27 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 
-make USEBUILDDIR=1 BACKEND=cuda
 
+make USEBUILDDIR=1 BACKEND=cuda
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make USEBUILDDIR=1 BACKEND=cppsse4
-
 
+make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+
 make USEBUILDDIR=1 BACKEND=cpp512y
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-09_00:50:28
+DATE: 2024-08-30_05:06:00
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0
  [UNWEIGHT] Wrote 2620 events (found 5403 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8208s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7796s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0412s for     8192 events => throughput is 1.99E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8205s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7782s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0423s for     8192 events => throughput is 1.94E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4160s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3749s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0411s for     8192 events => throughput is 1.99E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4194s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3765s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0429s for     8192 events => throughput is 1.91E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0
  [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7104s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2559s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.4544s for    90112 events => throughput is 1.98E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.7506s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2806s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4701s for    90112 events => throughput is 1.92E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.6 [44.598861353577519] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4204s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3749s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0450s for     8192 events => throughput is 1.82E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4273s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3798s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0470s for     8192 events => throughput is 1.74E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -167,10 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.58 [44.577525144126803] fbridge_mode=1
  [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7448s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2577s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4867s for    90112 events => throughput is 1.85E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [COUNTERS] PROGRAM TOTAL          :    1.7703s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2698s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5000s for    90112 events => throughput is 1.80E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.873127e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.827224e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.907422e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.858385e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,9 +211,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.6 [44.598861353577519] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3960s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3712s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0244s for     8192 events => throughput is 3.36E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4001s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3740s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0256s for     8192 events => throughput is 3.20E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -245,9 +245,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.58 [44.577525144126810] fbridge_mode=1
  [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5269s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2579s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2686s for    90112 events => throughput is 3.35E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.5448s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2672s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2772s for    90112 events => throughput is 3.25E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.333942e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.299374e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.376975e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.333877e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,9 +289,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.6 [44.598861344883289] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3926s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3769s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0153s for     8192 events => throughput is 5.37E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3941s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3783s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0154s for     8192 events => throughput is 5.30E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -323,9 +323,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.58 [44.577525178109212] fbridge_mode=1
  [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4173s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2508s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1662s for    90112 events => throughput is 5.42E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.4525s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2782s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1739s for    90112 events => throughput is 5.18E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.335642e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.280897e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.330908e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.361948e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,9 +367,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.6 [44.598861344883289] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3897s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3750s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0143s for     8192 events => throughput is 5.74E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3861s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3714s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0142s for     8192 events => throughput is 5.77E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -401,9 +401,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.58 [44.577525178109212] fbridge_mode=1
  [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4068s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2528s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1536s for    90112 events => throughput is 5.87E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.4270s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2702s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1563s for    90112 events => throughput is 5.77E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.855366e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.837527e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.947430e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.870753e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,9 +445,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.6 [44.598861344883289] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3995s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3772s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0219s for     8192 events => throughput is 3.75E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.3960s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3736s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0219s for     8192 events => throughput is 3.74E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -479,10 +479,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.58 [44.577525178109212] fbridge_mode=1
  [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4943s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2580s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2358s for    90112 events => throughput is 3.82E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [COUNTERS] PROGRAM TOTAL          :    1.5174s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.2715s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2453s for    90112 events => throughput is 3.67E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.733262e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.623653e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.702855e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.691171e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,9 +523,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.6 [44.598860056955807] fbridge_mode=1
  [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8053s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8039s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0007s for     8192 events => throughput is 1.21E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8038s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8024s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.30E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -557,9 +557,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 44.58 [44.577523872560512] fbridge_mode=1
  [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.6927s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.6853s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0066s for    90112 events => throughput is 1.36E+07 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.7081s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.7005s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0068s for    90112 events => throughput is 1.33E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
@@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.871837e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.992048e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.622666e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.630705e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.299743e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.263161e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.055606e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.062051e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.302003e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.277099e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.140289e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.139998e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.319830e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.271500e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.983678e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.946602e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 

From 7e930eb1a06d78c6035b55c185dd5d7194c5ae62 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Fri, 30 Aug 2024 08:05:31 +0200
Subject: [PATCH 36/50] [helas] in tmad/madX.sh, print the DATE also at the end
 of the test

---
 epochX/cudacpp/tmad/madX.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/epochX/cudacpp/tmad/madX.sh b/epochX/cudacpp/tmad/madX.sh
index 83158f2c5b..663a375922 100755
--- a/epochX/cudacpp/tmad/madX.sh
+++ b/epochX/cudacpp/tmad/madX.sh
@@ -704,3 +704,4 @@ else
 fi
 
 printf "\nTEST COMPLETED\n"
+printf "\nDATE: $(date '+%Y-%m-%d_%H:%M:%S')\n\n"

From f25cd7ab7eafd1acf5a40f2d5ce958bc233685f7 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Fri, 30 Aug 2024 09:53:34 +0200
Subject: [PATCH 37/50] [helas] rerun tmad ggttggg inlL

./tmad/teeMadX.sh -ggttggg +10x -makeclean -inlLonly
STARTED AT Fri Aug 30 08:08:13 AM CEST 2024
ENDED   AT Fri Aug 30 09:40:38 AM CEST 2024

Note: both CUDA and C++ are 5-15% slower in HELINL=L than in HELINL=0
For CUDA this can be seen both in the madevent test and in the check.exe test

diff -u --color tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inlL_hrd0.txt tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt

(C++ madevent test, 15% slower)
-Executing ' ./build.512y_d_inlL_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
+Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
@@ -401,10 +401,10 @@
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  325.4847s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.5005s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  320.9382s for    90112 events => throughput is 2.81E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0460s
+ [COUNTERS] PROGRAM TOTAL          :  286.1989s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4892s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  281.6678s for    90112 events => throughput is 3.20E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0420s

(CUDA madevent test, 10% slower)
-Executing ' ./build.cuda_d_inlL_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
+Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
@@ -557,10 +557,10 @@
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086656006E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :   19.6828s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.9752s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   13.4712s for    90112 events => throughput is 6.69E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    1.2365s
+ [COUNTERS] PROGRAM TOTAL          :   17.9918s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.9757s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   11.9277s for    90112 events => throughput is 7.55E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    1.0883s

(CUDA check test with large grid, 5% slower)
 *** EXECUTE GCHECK(MAX) -p 512 32 1 ***
-Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
+Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.102842e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.584992e+03                 )  sec^-1
---
 .../log_ggttggg_mad_d_inlL_hrd0.txt           | 159 +++++++++---------
 1 file changed, 81 insertions(+), 78 deletions(-)

diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inlL_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inlL_hrd0.txt
index d2204615a7..1895ebe1d5 100644
--- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inlL_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inlL_hrd0.txt
@@ -1,10 +1,10 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 
-make USEBUILDDIR=1 BACKEND=cuda
 
 
-make USEBUILDDIR=1 BACKEND=cppnone
+make USEBUILDDIR=1 BACKEND=cuda
 
+make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
 
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-28_22:56:29
+DATE: 2024-08-30_08:09:07
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0
  [UNWEIGHT] Wrote 1 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  104.2587s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5124s
- [COUNTERS] Fortran MEs      ( 1 ) :  103.7463s for     8192 events => throughput is 7.90E+01 events/s
+ [COUNTERS] PROGRAM TOTAL          :  103.8135s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5091s
+ [COUNTERS] Fortran MEs      ( 1 ) :  103.3044s for     8192 events => throughput is 7.93E+01 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  104.3674s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5136s
- [COUNTERS] Fortran MEs      ( 1 ) :  103.8538s for     8192 events => throughput is 7.89E+01 events/s
+ [COUNTERS] PROGRAM TOTAL          :  103.7790s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5101s
+ [COUNTERS] Fortran MEs      ( 1 ) :  103.2690s for     8192 events => throughput is 7.93E+01 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          : 1144.4747s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4497s
- [COUNTERS] Fortran MEs      ( 1 ) : 1140.0250s for    90112 events => throughput is 7.90E+01 events/s
+ [COUNTERS] PROGRAM TOTAL          : 1140.3185s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4393s
+ [COUNTERS] Fortran MEs      ( 1 ) : 1135.8792s for    90112 events => throughput is 7.93E+01 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,10 +133,10 @@ Executing ' ./build.none_d_inlL_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939197E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  143.1637s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5170s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  142.4135s for     8192 events => throughput is 5.75E+01 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2331s
+ [COUNTERS] PROGRAM TOTAL          :  139.9940s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5183s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  139.2573s for     8192 events => throughput is 5.88E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2184s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -167,10 +167,10 @@ Executing ' ./build.none_d_inlL_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086656006E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          : 1584.8411s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4979s
- [COUNTERS] CudaCpp MEs      ( 2 ) : 1580.1217s for    90112 events => throughput is 5.70E+01 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2215s
+ [COUNTERS] PROGRAM TOTAL          : 1544.9857s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4945s
+ [COUNTERS] CudaCpp MEs      ( 2 ) : 1540.2681s for    90112 events => throughput is 5.85E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2232s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.064548e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.268132e+01                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.012515e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.969834e+01                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -211,10 +211,10 @@ Executing ' ./build.sse4_d_inlL_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   67.8957s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5203s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   67.2615s for     8192 events => throughput is 1.22E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1138s
+ [COUNTERS] PROGRAM TOTAL          :   68.7901s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5160s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   68.1630s for     8192 events => throughput is 1.20E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1111s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -245,10 +245,10 @@ Executing ' ./build.sse4_d_inlL_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  744.9981s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.5014s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  740.3867s for    90112 events => throughput is 1.22E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1100s
+ [COUNTERS] PROGRAM TOTAL          :  746.7584s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4964s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  742.1503s for    90112 events => throughput is 1.21E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1117s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.465109e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.455329e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.456991e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.456504e+02                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -289,10 +289,10 @@ Executing ' ./build.avx2_d_inlL_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   32.8435s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5217s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   32.2689s for     8192 events => throughput is 2.54E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0530s
+ [COUNTERS] PROGRAM TOTAL          :   32.1059s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5201s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   31.5337s for     8192 events => throughput is 2.60E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0521s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -323,10 +323,10 @@ Executing ' ./build.avx2_d_inlL_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  359.2894s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.5142s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  354.7232s for    90112 events => throughput is 2.54E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0520s
+ [COUNTERS] PROGRAM TOTAL          :  350.3881s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4918s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  345.8443s for    90112 events => throughput is 2.61E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0519s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.096824e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.120269e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.124137e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.135721e+02                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -367,10 +367,10 @@ Executing ' ./build.512y_d_inlL_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   29.2042s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5176s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   28.6402s for     8192 events => throughput is 2.86E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0464s
+ [COUNTERS] PROGRAM TOTAL          :   29.5960s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5188s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   29.0309s for     8192 events => throughput is 2.82E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0463s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -401,10 +401,10 @@ Executing ' ./build.512y_d_inlL_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  320.6913s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.5138s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  316.1312s for    90112 events => throughput is 2.85E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0463s
+ [COUNTERS] PROGRAM TOTAL          :  325.4847s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5005s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  320.9382s for    90112 events => throughput is 2.81E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0460s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.507403e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.521187e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.509785e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.530730e+02                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -445,10 +445,10 @@ Executing ' ./build.512z_d_inlL_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   28.3605s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5210s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   27.7898s for     8192 events => throughput is 2.95E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0497s
+ [COUNTERS] PROGRAM TOTAL          :   28.6781s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5183s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   28.1085s for     8192 events => throughput is 2.91E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0512s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -479,10 +479,10 @@ Executing ' ./build.512z_d_inlL_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  313.4261s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.5141s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  308.8625s for    90112 events => throughput is 2.92E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0494s
+ [COUNTERS] PROGRAM TOTAL          :  313.0290s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5066s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  308.4727s for    90112 events => throughput is 2.92E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0497s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.237768e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.266545e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=L] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.237055e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.249275e+02                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -523,10 +523,10 @@ Executing ' ./build.cuda_d_inlL_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1
  [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :    3.4515s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.9832s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.2329s for     8192 events => throughput is 6.64E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    1.2354s
+ [COUNTERS] PROGRAM TOTAL          :    3.4526s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.9837s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.2318s for     8192 events => throughput is 6.65E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    1.2371s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -557,10 +557,10 @@ Executing ' ./build.cuda_d_inlL_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.332e-07 [2.3322993086656006E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :   19.6663s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.9649s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   13.4667s for    90112 events => throughput is 6.69E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    1.2347s
+ [COUNTERS] PROGRAM TOTAL          :   19.6828s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.9752s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   13.4712s for    90112 events => throughput is 6.69E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    1.2365s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -573,43 +573,46 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.697997e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.696004e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.001430e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.049873e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.490932e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.488277e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 512 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.099504e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.102842e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.480979e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.501193e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.864809e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.865953e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.495272e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.471918e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=L] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.066155e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.066284e+03                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
 TEST COMPLETED
+
+DATE: 2024-08-30_09:40:38
+

From fb0d91a4849b53048de6ed611e995a5c05c245af Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Mon, 2 Sep 2024 18:21:13 +0200
Subject: [PATCH 38/50] [helas] move to CODEGEN logs from the latest
 upstream/master for easier merging

git checkout upstream/master $(git ls-tree --name-only upstream/master */CODEGEN*txt)
---
 .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt   |  40 +-
 .../CODEGEN_cudacpp_ee_mumu_log.txt           |  22 +-
 .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt       |  38 +-
 .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt    |  24 +-
 .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt |  61 ++-
 .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt     |  42 +-
 .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt  |  22 +-
 .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt   |  40 +-
 .../CODEGEN_cudacpp_gg_ttgg_log.txt           |  28 +-
 .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt |  44 +--
 .../CODEGEN_cudacpp_gg_ttggg_log.txt          |  24 +-
 .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt     |  61 ++-
 .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt  |  34 +-
 .../CODEGEN_mad_heft_gg_bb_log.txt            |  36 +-
 .../CODEGEN_cudacpp_heft_gg_bb_log.txt        |  20 +-
 .../CODEGEN_mad_pp_tt012j_log.txt             | 371 +++++++++---------
 .../CODEGEN_mad_smeft_gg_tttt_log.txt         |  42 +-
 .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt     |  26 +-
 .../CODEGEN_mad_susy_gg_t1t1_log.txt          |  38 +-
 .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt      |  22 +-
 .../CODEGEN_mad_susy_gg_tt_log.txt            |  40 +-
 .../CODEGEN_cudacpp_susy_gg_tt_log.txt        |  20 +-
 22 files changed, 504 insertions(+), 591 deletions(-)

diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
index 88af428730..e151ce070c 100644
--- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
+++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
@@ -62,7 +62,7 @@ generate e+ e- > mu+ mu-
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005793333053588867 [0m
+[1;32mDEBUG: model prefixing  takes 0.00567936897277832 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -176,8 +176,7 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 
 INFO: Processing color information for process: e+ e- > mu+ mu- @1 
 INFO: Creating files in directory P1_epem_mupmum 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fddd6802550> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -186,30 +185,30 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 [1;34mWARNING: vector code for lepton pdf not implemented. We removed the option to run dressed lepton [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 
 INFO: Finding symmetric diagrams for subprocess group epem_mupmum 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses/P1_epem_mupmum [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 2 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses/P1_epem_mupmum [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 2 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1545][0m [0m
 Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s
-Wrote files for 8 helas calls in 0.118 s
+Wrote files for 8 helas calls in 0.107 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
-ALOHA: aloha creates 3 routines in  0.207 s
+ALOHA: aloha creates 3 routines in  0.202 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
 ALOHA: aloha creates FFV2_4 routines[0m
-ALOHA: aloha creates 7 routines in  0.262 s
+ALOHA: aloha creates 7 routines in  0.258 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2
@@ -220,8 +219,6 @@ ALOHA: aloha creates 7 routines in  0.262 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2_4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.h
@@ -237,7 +234,6 @@ INFO: Generate jpeg diagrams
 INFO: Generate web pages 
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file Source/genps.inc
-patching file Source/makefile
 patching file SubProcesses/makefile
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses/P1_epem_mupmum; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
@@ -245,17 +241,17 @@ Hunk #1 succeeded at 496 (offset 12 lines).
 patching file driver.f
 patching file matrix1.f
 Hunk #2 succeeded at 229 (offset 9 lines).
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 242][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 255][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.143s
-user	0m1.874s
-sys	0m0.265s
-Code generation completed in 2 seconds
+real	0m2.783s
+user	0m1.820s
+sys	0m0.261s
+Code generation completed in 3 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
index 5ca2772f5f..a77157e74f 100644
--- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
+++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
@@ -62,7 +62,7 @@ generate e+ e- > mu+ mu-
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005778312683105469 [0m
+[1;32mDEBUG: model prefixing  takes 0.005564689636230469 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -168,11 +168,11 @@ INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 
 INFO: Processing color information for process: e+ e- > mu+ mu- @1 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 196][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 197][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 198][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 199][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 200][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 209][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 210][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 211][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 212][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 213][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc
@@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
 ALOHA: aloha creates FFV2_4 routines[0m
-ALOHA: aloha creates 4 routines in  0.278 s
+ALOHA: aloha creates 4 routines in  0.266 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2
@@ -194,8 +194,6 @@ ALOHA: aloha creates 4 routines in  0.278 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2_4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.h
@@ -204,7 +202,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. 
 quit
 
-real	0m0.686s
-user	0m0.625s
-sys	0m0.055s
+real	0m0.666s
+user	0m0.592s
+sys	0m0.058s
 Code generation completed in 0 seconds
diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
index f321315aec..3c9665b806 100644
--- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
+++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005693197250366211 [0m
+[1;32mDEBUG: model prefixing  takes 0.005519866943359375 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -177,8 +177,7 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
 INFO: Creating files in directory P1_gg_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fcbba8dde80> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -187,34 +186,32 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1545][0m [0m
 Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
-Wrote files for 10 helas calls in 0.120 s
+Wrote files for 10 helas calls in 0.109 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.152 s
+ALOHA: aloha creates 2 routines in  0.149 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 4 routines in  0.138 s
+ALOHA: aloha creates 4 routines in  0.134 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.h
@@ -230,22 +227,21 @@ INFO: Generate jpeg diagrams
 INFO: Generate web pages 
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file Source/genps.inc
-patching file Source/makefile
 patching file SubProcesses/makefile
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 patching file driver.f
 patching file matrix1.f
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 242][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 255][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m1.975s
-user	0m1.699s
-sys	0m0.276s
+real	0m2.133s
+user	0m1.651s
+sys	0m0.279s
 Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt
index 36e8aed83f..8c72501b9d 100644
--- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt
+++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005682468414306641 [0m
+[1;32mDEBUG: model prefixing  takes 0.005546092987060547 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.009 s
+1 processes with 3 diagrams generated in 0.008 s
 Total: 1 processes with 3 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt
 Load PLUGIN.CUDACPP_OUTPUT
@@ -169,11 +169,11 @@ INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 196][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 197][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 198][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 199][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 200][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 209][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 210][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 211][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 212][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 213][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc
@@ -182,15 +182,13 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.149 s
+ALOHA: aloha creates 2 routines in  0.145 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.h
@@ -199,7 +197,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. 
 quit
 
-real	0m0.559s
-user	0m0.504s
-sys	0m0.051s
+real	0m0.630s
+user	0m0.483s
+sys	0m0.052s
 Code generation completed in 0 seconds
diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt
index ee9cfb416f..85e9008ce9 100644
--- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt
+++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005636453628540039 [0m
+[1;32mDEBUG: model prefixing  takes 0.005473613739013672 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.008 s
+1 processes with 3 diagrams generated in 0.009 s
 Total: 1 processes with 3 diagrams
 add process g g > t t~ g
 INFO: Checking for minimal orders which gives processes. 
@@ -187,8 +187,7 @@ INFO: Processing color information for process: g g > t t~ g @2
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
 INFO: Creating files in directory P2_gg_ttxg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fa48afd9970> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -197,19 +196,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxg 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P2_gg_ttxg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P2_gg_ttxg [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1545][0m [0m
 INFO: Creating files in directory P1_gg_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fa48afd9ac0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -218,32 +216,32 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 1 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1626][0m [0m
-Generated helas calls for 2 subprocesses (19 diagrams) in 0.044 s
-Wrote files for 46 helas calls in 0.280 s
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1545][0m [0m
+Generated helas calls for 2 subprocesses (19 diagrams) in 0.043 s
+Wrote files for 46 helas calls in 0.259 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 5 routines in  0.338 s
+ALOHA: aloha creates 5 routines in  0.330 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 10 routines in  0.768 s
+ALOHA: aloha creates 10 routines in  0.326 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -255,8 +253,6 @@ ALOHA: aloha creates 10 routines in  0.768 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.h
@@ -272,7 +268,6 @@ INFO: Generate jpeg diagrams
 INFO: Generate web pages 
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file Source/genps.inc
-patching file Source/makefile
 patching file SubProcesses/makefile
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
@@ -283,16 +278,16 @@ patching file auto_dsig1.f
 patching file driver.f
 patching file matrix1.f
 Hunk #2 succeeded at 236 (offset 16 lines).
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 242][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 255][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m3.190s
-user	0m2.460s
-sys	0m0.288s
+real	0m3.083s
+user	0m2.525s
+sys	0m0.334s
 Code generation completed in 3 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt
index 401f85e77f..e3198e397b 100644
--- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt
+++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~ g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005800962448120117 [0m
+[1;32mDEBUG: model prefixing  takes 0.005499839782714844 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1  
 INFO: Process has 16 diagrams 
-1 processes with 16 diagrams generated in 0.023 s
+1 processes with 16 diagrams generated in 0.022 s
 Total: 1 processes with 16 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -177,8 +177,7 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 
 INFO: Processing color information for process: g g > t t~ g @1 
 INFO: Creating files in directory P1_gg_ttxg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f4503b43b50> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -187,25 +186,25 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxg 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1626][0m [0m
-Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s
-Wrote files for 36 helas calls in 0.169 s
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1545][0m [0m
+Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s
+Wrote files for 36 helas calls in 0.157 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 5 routines in  0.338 s
+ALOHA: aloha creates 5 routines in  0.330 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
@@ -224,8 +223,6 @@ ALOHA: aloha creates 10 routines in  0.323 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.h
@@ -241,24 +238,23 @@ INFO: Generate jpeg diagrams
 INFO: Generate web pages 
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file Source/genps.inc
-patching file Source/makefile
 patching file SubProcesses/makefile
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 patching file driver.f
 patching file matrix1.f
 Hunk #2 succeeded at 236 (offset 16 lines).
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 242][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 255][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.547s
-user	0m2.261s
-sys	0m0.286s
-Code generation completed in 2 seconds
+real	0m2.487s
+user	0m2.208s
+sys	0m0.278s
+Code generation completed in 3 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt
index bb9ce0f548..ed6a4e0f7b 100644
--- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt
+++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~ g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.0057353973388671875 [0m
+[1;32mDEBUG: model prefixing  takes 0.005787849426269531 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -169,11 +169,11 @@ INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 
 INFO: Processing color information for process: g g > t t~ g @1 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 196][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 197][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 198][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 199][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 200][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 209][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 210][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 211][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 212][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 213][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc
@@ -185,7 +185,7 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 5 routines in  0.340 s
+ALOHA: aloha creates 5 routines in  0.334 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -197,8 +197,6 @@ ALOHA: aloha creates 5 routines in  0.340 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.h
@@ -207,7 +205,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. 
 quit
 
-real	0m0.812s
-user	0m0.751s
-sys	0m0.056s
+real	0m0.806s
+user	0m0.744s
+sys	0m0.057s
 Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt
index 040b2ee799..c8632ccb1b 100644
--- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt
+++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~ g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005586862564086914 [0m
+[1;32mDEBUG: model prefixing  takes 0.005680084228515625 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1  
 INFO: Process has 123 diagrams 
-1 processes with 123 diagrams generated in 0.162 s
+1 processes with 123 diagrams generated in 0.167 s
 Total: 1 processes with 123 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -177,8 +177,7 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 
 INFO: Processing color information for process: g g > t t~ g g @1 
 INFO: Creating files in directory P1_gg_ttxgg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fd3cad1ee50> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -187,18 +186,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 105 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [1;30m[model_handling.py at line 1626][0m [0m
-Generated helas calls for 1 subprocesses (123 diagrams) in 0.441 s
-Wrote files for 222 helas calls in 0.731 s
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 105 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [1;30m[model_handling.py at line 1545][0m [0m
+Generated helas calls for 1 subprocesses (123 diagrams) in 0.444 s
+Wrote files for 222 helas calls in 0.751 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
@@ -212,7 +211,7 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 10 routines in  0.323 s
+ALOHA: aloha creates 10 routines in  0.316 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -227,8 +226,6 @@ ALOHA: aloha creates 10 routines in  0.323 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.h
@@ -244,23 +241,22 @@ INFO: Generate jpeg diagrams
 INFO: Generate web pages 
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file Source/genps.inc
-patching file Source/makefile
 patching file SubProcesses/makefile
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 patching file driver.f
 patching file matrix1.f
 Hunk #2 succeeded at 268 (offset 48 lines).
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 242][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 255][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m3.936s
-user	0m3.657s
-sys	0m0.271s
+real	0m4.091s
+user	0m3.635s
+sys	0m0.277s
 Code generation completed in 4 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt
index 5a80864c2c..6b86df6911 100644
--- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt
+++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~ g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.0056858062744140625 [0m
+[1;32mDEBUG: model prefixing  takes 0.005385398864746094 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1  
 INFO: Process has 123 diagrams 
-1 processes with 123 diagrams generated in 0.163 s
+1 processes with 123 diagrams generated in 0.159 s
 Total: 1 processes with 123 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg
 Load PLUGIN.CUDACPP_OUTPUT
@@ -169,23 +169,23 @@ INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 
 INFO: Processing color information for process: g g > t t~ g g @1 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 196][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 197][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 198][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 199][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 200][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 209][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 210][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 211][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 212][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 213][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. 
-Generated helas calls for 1 subprocesses (123 diagrams) in 0.441 s
+Generated helas calls for 1 subprocesses (123 diagrams) in 0.426 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.333 s
+ALOHA: aloha creates 5 routines in  0.321 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -200,8 +200,6 @@ ALOHA: aloha creates 5 routines in  0.333 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.h
@@ -210,7 +208,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. 
 quit
 
-real	0m1.499s
-user	0m1.430s
-sys	0m0.060s
-Code generation completed in 1 seconds
+real	0m1.563s
+user	0m1.367s
+sys	0m0.070s
+Code generation completed in 2 seconds
diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt
index 71e8a6eff9..83072da01b 100644
--- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt
+++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~ g g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005856752395629883 [0m
+[1;32mDEBUG: model prefixing  takes 0.005414009094238281 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1  
 INFO: Process has 1240 diagrams 
-1 processes with 1240 diagrams generated in 1.946 s
+1 processes with 1240 diagrams generated in 1.900 s
 Total: 1 processes with 1240 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -179,8 +179,7 @@ INFO: Processing color information for process: g g > t t~ g g g @1
 INFO: Creating files in directory P1_gg_ttxggg 
 INFO: Computing Color-Flow optimization [15120 term] 
 INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fb37f02bc10> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -189,32 +188,32 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 945 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [1;30m[model_handling.py at line 1626][0m [0m
-Generated helas calls for 1 subprocesses (1240 diagrams) in 6.788 s
-Wrote files for 2281 helas calls in 19.161 s
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 945 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [1;30m[model_handling.py at line 1545][0m [0m
+Generated helas calls for 1 subprocesses (1240 diagrams) in 6.624 s
+Wrote files for 2281 helas calls in 18.662 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.333 s
+ALOHA: aloha creates 5 routines in  0.322 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 10 routines in  0.379 s
+ALOHA: aloha creates 10 routines in  0.360 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -229,8 +228,6 @@ ALOHA: aloha creates 10 routines in  0.379 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.h
@@ -246,24 +243,23 @@ INFO: Generate jpeg diagrams
 INFO: Generate web pages 
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file Source/genps.inc
-patching file Source/makefile
 patching file SubProcesses/makefile
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 patching file driver.f
 patching file matrix1.f
 Hunk #2 succeeded at 332 (offset 112 lines).
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 242][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 255][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m33.818s
-user	0m33.213s
-sys	0m0.496s
-Code generation completed in 34 seconds
+real	0m32.948s
+user	0m32.377s
+sys	0m0.464s
+Code generation completed in 33 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt
index 3b48b57384..c7461f51b6 100644
--- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt
+++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~ g g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005753278732299805 [0m
+[1;32mDEBUG: model prefixing  takes 0.0055277347564697266 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1  
 INFO: Process has 1240 diagrams 
-1 processes with 1240 diagrams generated in 1.962 s
+1 processes with 1240 diagrams generated in 1.878 s
 Total: 1 processes with 1240 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg
 Load PLUGIN.CUDACPP_OUTPUT
@@ -169,23 +169,23 @@ INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 
 INFO: Processing color information for process: g g > t t~ g g g @1 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 196][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 197][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 198][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 199][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 200][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 209][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 210][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 211][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 212][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 213][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. 
-Generated helas calls for 1 subprocesses (1240 diagrams) in 6.855 s
+Generated helas calls for 1 subprocesses (1240 diagrams) in 6.764 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.364 s
+ALOHA: aloha creates 5 routines in  0.351 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -200,8 +200,6 @@ ALOHA: aloha creates 5 routines in  0.364 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.h
@@ -210,7 +208,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. 
 quit
 
-real	0m13.568s
-user	0m13.380s
+real	0m13.357s
+user	0m13.042s
 sys	0m0.131s
 Code generation completed in 13 seconds
diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt
index 74a5555444..14e43c4006 100644
--- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt
+++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt
@@ -61,7 +61,7 @@ set zerowidth_tchannel F
 define q = u c d s u~ c~ d~ s~
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.0056743621826171875 [0m
+[1;32mDEBUG: model prefixing  takes 0.005688667297363281 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams.
 INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. 
 INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. 
 INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. 
-8 processes with 40 diagrams generated in 0.081 s
+8 processes with 40 diagrams generated in 0.079 s
 Total: 8 processes with 40 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -200,8 +200,7 @@ INFO: Combined process g c~ > t t~ c~ WEIGHTED<=3 @1 with process g u~ > t t~ u~
 INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Creating files in directory P1_gu_ttxu 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f794f66ab20> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -210,19 +209,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gu_ttxu 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1545][0m [0m
 INFO: Creating files in directory P1_gux_ttxux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f794f49d370> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -231,26 +229,26 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 1 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gux_ttxux 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1545][0m [0m
 Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s
-Wrote files for 32 helas calls in 0.255 s
+Wrote files for 32 helas calls in 0.236 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.152 s
+ALOHA: aloha creates 2 routines in  0.145 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVV1 routines[0m
-ALOHA: aloha creates 4 routines in  0.140 s
+ALOHA: aloha creates 4 routines in  0.134 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -258,8 +256,6 @@ ALOHA: aloha creates 4 routines in  0.140 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.h
@@ -275,7 +271,6 @@ INFO: Generate jpeg diagrams
 INFO: Generate web pages 
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file Source/genps.inc
-patching file Source/makefile
 patching file SubProcesses/makefile
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
@@ -291,17 +286,17 @@ patching file driver.f
 patching file matrix1.f
 Hunk #1 succeeded at 75 (offset 3 lines).
 Hunk #2 succeeded at 246 (offset 26 lines).
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 242][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 255][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.337s
-user	0m2.045s
-sys	0m0.286s
-Code generation completed in 3 seconds
+real	0m2.670s
+user	0m1.976s
+sys	0m0.299s
+Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt
index fd260607af..3b7a18826e 100644
--- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt
+++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt
@@ -61,7 +61,7 @@ set zerowidth_tchannel F
 define q = u c d s u~ c~ d~ s~
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005650043487548828 [0m
+[1;32mDEBUG: model prefixing  takes 0.00568699836730957 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -192,20 +192,20 @@ INFO: Processing color information for process: g u~ > t t~ u~ @1
 INFO: Combined process g c~ > t t~ c~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 196][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 197][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 198][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 199][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 200][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 209][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 210][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 211][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 212][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 213][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/. 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 196][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 197][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 198][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=1 [1;30m[output.py at line 199][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 200][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 209][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 210][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 211][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=1 [1;30m[output.py at line 212][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 213][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc
@@ -214,7 +214,7 @@ Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.150 s
+ALOHA: aloha creates 2 routines in  0.146 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -222,8 +222,6 @@ ALOHA: aloha creates 2 routines in  0.150 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.h
@@ -232,7 +230,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. 
 quit
 
-real	0m0.680s
-user	0m0.620s
-sys	0m0.051s
-Code generation completed in 1 seconds
+real	0m2.317s
+user	0m0.597s
+sys	0m0.060s
+Code generation completed in 3 seconds
diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt
index 721fa8b560..f5600aa4b9 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt
+++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt
@@ -149,8 +149,7 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > b b~ HIG<=1 HIW<=1 @1 
 INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 
 INFO: Creating files in directory P1_gg_bbx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2ecd94bc70> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -159,30 +158,30 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3, 4] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > b b~ HIG<=1 HIW<=1 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_bbx 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/SubProcesses/P1_gg_bbx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 4 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/SubProcesses/P1_gg_bbx [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 4 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1545][0m [0m
 Generated helas calls for 1 subprocesses (4 diagrams) in 0.009 s
-Wrote files for 12 helas calls in 0.122 s
+Wrote files for 12 helas calls in 0.114 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVS3 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFS2 routines[0m
-ALOHA: aloha creates 4 routines in  0.277 s
+ALOHA: aloha creates 4 routines in  0.266 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVS3 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFS2 routines[0m
-ALOHA: aloha creates 8 routines in  0.261 s
+ALOHA: aloha creates 8 routines in  0.249 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVS3
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -191,8 +190,6 @@ ALOHA: aloha creates 8 routines in  0.261 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFS2
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./HelAmps_heft.h
 INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./Parameters_heft.h
@@ -208,22 +205,21 @@ INFO: Generate jpeg diagrams
 INFO: Generate web pages 
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file Source/genps.inc
-patching file Source/makefile
 patching file SubProcesses/makefile
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/SubProcesses/P1_gg_bbx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 patching file driver.f
 patching file matrix1.f
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 242][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 255][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.240s
-user	0m1.968s
-sys	0m0.266s
+real	0m2.340s
+user	0m1.919s
+sys	0m0.257s
 Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt
index 7a3a1de366..c4aea42269 100644
--- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt
+++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt
@@ -141,11 +141,11 @@ INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > b b~ HIG<=1 HIW<=1 @1 
 INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 196][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 197][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 198][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 199][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 200][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 209][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 210][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 211][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 212][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 213][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.cc
@@ -156,7 +156,7 @@ ALOHA: aloha creates VVS3 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFS2 routines[0m
-ALOHA: aloha creates 4 routines in  0.271 s
+ALOHA: aloha creates 4 routines in  0.268 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVS3
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -165,8 +165,6 @@ ALOHA: aloha creates 4 routines in  0.271 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFS2
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./HelAmps_heft.h
 INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./Parameters_heft.h
@@ -175,7 +173,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. 
 quit
 
-real	0m0.673s
-user	0m0.602s
-sys	0m0.058s
+real	0m1.034s
+user	0m0.591s
+sys	0m0.057s
 Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt
index fe9270d9f7..a169e46fe3 100644
--- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt
+++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt
@@ -61,7 +61,7 @@ set zerowidth_tchannel F
 define j = p
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005668163299560547 [0m
+[1;32mDEBUG: model prefixing  takes 0.0056304931640625 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~
 INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ 
 INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ 
 INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ 
-5 processes with 7 diagrams generated in 0.031 s
+5 processes with 7 diagrams generated in 0.030 s
 Total: 5 processes with 7 diagrams
 add process p p > t t~ j @1
 INFO: Checking for minimal orders which gives processes. 
@@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~
 INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g 
 INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ 
 INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g 
-13 processes with 76 diagrams generated in 0.142 s
+13 processes with 76 diagrams generated in 0.138 s
 Total: 18 processes with 83 diagrams
 add process p p > t t~ j j @2
 INFO: Checking for minimal orders which gives processes. 
@@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~
 INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ 
 INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ 
 INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. 
-65 processes with 1119 diagrams generated in 1.879 s
+65 processes with 1119 diagrams generated in 1.828 s
 Total: 83 processes with 1202 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -499,8 +499,7 @@ INFO: Combined process c c~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED
 INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 
 INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 
 INFO: Creating files in directory P2_gg_ttxgg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f920155a700> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -509,19 +508,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxgg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 105 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxgg [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 105 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [1;30m[model_handling.py at line 1545][0m [0m
 INFO: Creating files in directory P2_gg_ttxuux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9201815490> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -530,19 +528,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 1 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxuux [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxuux [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1545][0m [0m
 INFO: Creating files in directory P2_gu_ttxgu 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9201bec4f0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -551,19 +548,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 2 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gu_ttxgu [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gu_ttxgu [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1545][0m [0m
 INFO: Creating files in directory P2_gux_ttxgux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9201815490> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -572,19 +568,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 3 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gux_ttxgux [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gux_ttxgux [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1545][0m [0m
 INFO: Creating files in directory P2_uux_ttxgg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f920155a5b0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -593,19 +588,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 4 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxgg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxgg [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1545][0m [0m
 INFO: Creating files in directory P1_gg_ttxg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9201bec4f0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -614,19 +608,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 5 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxg 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gg_ttxg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gg_ttxg [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1545][0m [0m
 INFO: Creating files in directory P2_uu_ttxuu 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f920183ad60> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -635,19 +628,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 6 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uu_ttxuu [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uu_ttxuu [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1545][0m [0m
 INFO: Creating files in directory P2_uux_ttxuux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f920182a9a0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -656,19 +648,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 7 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxuux [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxuux [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1545][0m [0m
 INFO: Creating files in directory P2_uxux_ttxuxux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9201bec4f0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -677,19 +668,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 8 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxux_ttxuxux [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxux_ttxuxux [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1545][0m [0m
 INFO: Creating files in directory P2_uc_ttxuc 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f92018513d0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -698,19 +688,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 9 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uc_ttxuc [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uc_ttxuc [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1545][0m [0m
 INFO: Creating files in directory P2_uux_ttxccx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f92017f7bb0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -719,19 +708,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 10 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxccx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxccx [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1545][0m [0m
 INFO: Creating files in directory P2_ucx_ttxucx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f92017fdb80> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -740,19 +728,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 11 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_ucx_ttxucx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_ucx_ttxucx [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1545][0m [0m
 INFO: Creating files in directory P2_uxcx_ttxuxcx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9201bec4f0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -761,19 +748,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 12 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxcx_ttxuxcx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxcx_ttxuxcx [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1545][0m [0m
 INFO: Creating files in directory P1_gu_ttxu 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9201812df0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -782,19 +768,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 13 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gu_ttxu 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gu_ttxu [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gu_ttxu [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1545][0m [0m
 INFO: Creating files in directory P1_gux_ttxux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9201bec4f0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -803,19 +788,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 14 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gux_ttxux 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gux_ttxux [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gux_ttxux [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1545][0m [0m
 INFO: Creating files in directory P1_uux_ttxg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9201c5eaf0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -824,19 +808,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 15 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxg 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_uux_ttxg [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_uux_ttxg [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1545][0m [0m
 INFO: Creating files in directory P0_gg_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9201c5eaf0> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -845,19 +828,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 16 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1545][0m [0m
 INFO: Creating files in directory P0_uux_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f9201851b20> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -866,32 +848,32 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 17 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttx 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_uux_ttx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 1 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1} [1;30m[model_handling.py at line 1626][0m [0m
-Generated helas calls for 18 subprocesses (372 diagrams) in 1.336 s
-Wrote files for 810 helas calls in 3.622 s
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_uux_ttx [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 1 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1} [1;30m[model_handling.py at line 1545][0m [0m
+Generated helas calls for 18 subprocesses (372 diagrams) in 1.296 s
+Wrote files for 810 helas calls in 3.405 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.347 s
+ALOHA: aloha creates 5 routines in  0.340 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 10 routines in  0.326 s
+ALOHA: aloha creates 10 routines in  0.320 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -906,8 +888,6 @@ ALOHA: aloha creates 10 routines in  0.326 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.h
@@ -923,7 +903,6 @@ INFO: Generate jpeg diagrams
 INFO: Generate web pages 
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file Source/genps.inc
-patching file Source/makefile
 patching file SubProcesses/makefile
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
@@ -1044,17 +1023,17 @@ patching file driver.f
 patching file matrix1.f
 Hunk #1 succeeded at 75 (offset 3 lines).
 Hunk #2 succeeded at 271 (offset 51 lines).
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 242][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 255][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m11.543s
-user	0m10.575s
-sys	0m0.930s
-Code generation completed in 12 seconds
+real	0m14.186s
+user	0m10.192s
+sys	0m0.908s
+Code generation completed in 14 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt
index 7d5ba7f16f..04f8799925 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt
@@ -77,7 +77,7 @@ INFO: load vertices
 [1;32mDEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1) [0m
 [1;32mDEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3) [0m
 [1;32mDEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1) [0m
-[1;32mDEBUG: model prefixing  takes 0.14253568649291992 [0m
+[1;32mDEBUG: model prefixing  takes 0.14439702033996582 [0m
 INFO: Change particles name to pass to MG5 convention 
 Defined multiparticle p = g u c d s u~ c~ d~ s~
 Defined multiparticle j = g u c d s u~ c~ d~ s~
@@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 
 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1  
 INFO: Process has 72 diagrams 
-1 processes with 72 diagrams generated in 3.838 s
+1 processes with 72 diagrams generated in 3.810 s
 Total: 1 processes with 72 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -114,8 +114,7 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 
 INFO: Processing color information for process: g g > t t~ t t~ @1 
 INFO: Creating files in directory P1_gg_ttxttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fee1df5d100> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -124,32 +123,32 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 0, 69, 70] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ t t~ WEIGHTED<=4 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxttx 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/SubProcesses/P1_gg_ttxttx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 70 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [1;30m[model_handling.py at line 1626][0m [0m
-Generated helas calls for 1 subprocesses (72 diagrams) in 0.193 s
-Wrote files for 119 helas calls in 0.445 s
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/SubProcesses/P1_gg_ttxttx [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 70 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [1;30m[model_handling.py at line 1545][0m [0m
+Generated helas calls for 1 subprocesses (72 diagrams) in 0.189 s
+Wrote files for 119 helas calls in 0.429 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV5 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV9 routines[0m
 ALOHA: aloha creates VVVV10 routines[0m
-ALOHA: aloha creates 5 routines in  0.331 s
+ALOHA: aloha creates 5 routines in  0.324 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV5 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV9 routines[0m
 ALOHA: aloha creates VVVV10 routines[0m
-ALOHA: aloha creates 10 routines in  0.344 s
+ALOHA: aloha creates 10 routines in  0.335 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -161,8 +160,6 @@ ALOHA: aloha creates 10 routines in  0.344 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV10
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
 INFO: Created file HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.h
@@ -178,23 +175,22 @@ INFO: Generate jpeg diagrams
 INFO: Generate web pages 
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file Source/genps.inc
-patching file Source/makefile
 patching file SubProcesses/makefile
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/SubProcesses/P1_gg_ttxttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 patching file driver.f
 patching file matrix1.f
 Hunk #2 succeeded at 268 (offset 48 lines).
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 242][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 255][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m7.445s
-user	0m7.114s
-sys	0m0.296s
+real	0m7.467s
+user	0m7.032s
+sys	0m0.290s
 Code generation completed in 7 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
index 0b61fdbda6..6df47f4f95 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
@@ -77,7 +77,7 @@ INFO: load vertices
 [1;32mDEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1) [0m
 [1;32mDEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3) [0m
 [1;32mDEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1) [0m
-[1;32mDEBUG: model prefixing  takes 0.14379262924194336 [0m
+[1;32mDEBUG: model prefixing  takes 0.139725923538208 [0m
 INFO: Change particles name to pass to MG5 convention 
 Defined multiparticle p = g u c d s u~ c~ d~ s~
 Defined multiparticle j = g u c d s u~ c~ d~ s~
@@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 
 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1  
 INFO: Process has 72 diagrams 
-1 processes with 72 diagrams generated in 3.829 s
+1 processes with 72 diagrams generated in 3.795 s
 Total: 1 processes with 72 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt
 Load PLUGIN.CUDACPP_OUTPUT
@@ -106,23 +106,23 @@ INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 
 INFO: Processing color information for process: g g > t t~ t t~ @1 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 196][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 197][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 198][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 199][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 200][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 209][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 210][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 211][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 212][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 213][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. 
-Generated helas calls for 1 subprocesses (72 diagrams) in 0.195 s
+Generated helas calls for 1 subprocesses (72 diagrams) in 0.186 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV5 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV9 routines[0m
 ALOHA: aloha creates VVVV10 routines[0m
-ALOHA: aloha creates 5 routines in  0.331 s
+ALOHA: aloha creates 5 routines in  0.319 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -134,8 +134,6 @@ ALOHA: aloha creates 5 routines in  0.331 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV10
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
 INFO: Created file HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.h
@@ -144,7 +142,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. 
 quit
 
-real	0m5.290s
-user	0m5.191s
-sys	0m0.065s
+real	0m5.822s
+user	0m5.091s
+sys	0m0.086s
 Code generation completed in 6 seconds
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt
index e4b5a44402..ccd20f71a0 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt
@@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1  
 INFO: Process has 6 diagrams 
-1 processes with 6 diagrams generated in 0.127 s
+1 processes with 6 diagrams generated in 0.125 s
 Total: 1 processes with 6 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -576,8 +576,7 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t1 t1~ @1 
 INFO: Creating files in directory P1_gg_t1t1x 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fcd0c7c1a90> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -586,28 +585,28 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [0, 1, 2, 3, 4, 5] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t1 t1~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_t1t1x 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses/P1_gg_t1t1x [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses/P1_gg_t1t1x [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [1;30m[model_handling.py at line 1545][0m [0m
 Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s
-Wrote files for 16 helas calls in 0.128 s
+Wrote files for 16 helas calls in 0.118 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VSS1 routines[0m
 ALOHA: aloha creates VVSS1 routines[0m
-ALOHA: aloha creates 3 routines in  0.191 s
+ALOHA: aloha creates 3 routines in  0.185 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VSS1 routines[0m
 ALOHA: aloha creates VVSS1 routines[0m
-ALOHA: aloha creates 6 routines in  0.188 s
+ALOHA: aloha creates 6 routines in  0.183 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
@@ -615,8 +614,6 @@ ALOHA: aloha creates 6 routines in  0.188 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVSS1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./HelAmps_MSSM_SLHA2.h
 INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.h
@@ -632,23 +629,22 @@ INFO: Generate jpeg diagrams
 INFO: Generate web pages 
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file Source/genps.inc
-patching file Source/makefile
 patching file SubProcesses/makefile
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses/P1_gg_t1t1x; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 patching file driver.f
 patching file matrix1.f
 Hunk #2 succeeded at 208 (offset -12 lines).
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 242][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 255][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1 done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m3.094s
-user	0m2.788s
-sys	0m0.304s
+real	0m3.359s
+user	0m2.722s
+sys	0m0.296s
 Code generation completed in 3 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt
index c00973accb..5784fbb717 100644
--- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt
+++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt
@@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1  
 INFO: Process has 6 diagrams 
-1 processes with 6 diagrams generated in 0.128 s
+1 processes with 6 diagrams generated in 0.125 s
 Total: 1 processes with 6 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1
 Load PLUGIN.CUDACPP_OUTPUT
@@ -568,11 +568,11 @@ INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t1 t1~ @1 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 196][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 197][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 198][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 199][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 200][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 209][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 210][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 211][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 212][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 213][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.cc
@@ -582,7 +582,7 @@ ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VSS1 routines[0m
 ALOHA: aloha creates VVSS1 routines[0m
-ALOHA: aloha creates 3 routines in  0.190 s
+ALOHA: aloha creates 3 routines in  0.187 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
@@ -590,8 +590,6 @@ ALOHA: aloha creates 3 routines in  0.190 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVSS1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./HelAmps_MSSM_SLHA2.h
 INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.h
@@ -600,7 +598,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. 
 quit
 
-real	0m1.376s
-user	0m1.301s
-sys	0m0.068s
+real	0m1.632s
+user	0m1.277s
+sys	0m0.075s
 Code generation completed in 2 seconds
diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt
index e6ba72c45f..c34ce44772 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt
+++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt
@@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.124 s
+1 processes with 3 diagrams generated in 0.119 s
 Total: 1 processes with 3 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -576,8 +576,7 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
 INFO: Creating files in directory P1_gg_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7ff06d078c40> [1;30m[export_v4.py at line 6261][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -586,34 +585,32 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 [1;32mDEBUG:  config_map = [0m [1, 2, 3] [1;30m[export_cpp.py at line 711][0m [0m
 [1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
 [1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1871][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
+[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/SubProcesses/P1_gg_ttx [1;30m[export_v4.py at line 6438][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/SubProcesses/P1_gg_ttx [1;30m[export_v4.py at line 6444][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1520][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1544][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1545][0m [0m
 Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
-Wrote files for 10 helas calls in 0.121 s
+Wrote files for 10 helas calls in 0.111 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.141 s
+ALOHA: aloha creates 2 routines in  0.506 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 4 routines in  0.140 s
+ALOHA: aloha creates 4 routines in  0.136 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./HelAmps_MSSM_SLHA2.h
 INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./Parameters_MSSM_SLHA2.h
@@ -629,23 +626,22 @@ INFO: Generate jpeg diagrams
 INFO: Generate web pages 
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file Source/genps.inc
-patching file Source/makefile
 patching file SubProcesses/makefile
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file auto_dsig1.f
 patching file driver.f
 patching file matrix1.f
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 242][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 255][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.986s
-user	0m2.651s
-sys	0m0.300s
-Code generation completed in 3 seconds
+real	0m4.546s
+user	0m2.601s
+sys	0m0.292s
+Code generation completed in 4 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt
index 9662488371..536bf0e536 100644
--- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt
+++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt
@@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.126 s
+1 processes with 3 diagrams generated in 0.120 s
 Total: 1 processes with 3 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt
 Load PLUGIN.CUDACPP_OUTPUT
@@ -568,11 +568,11 @@ INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 196][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 197][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 198][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 199][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 200][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 209][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 210][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 211][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 212][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 213][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.cc
@@ -581,15 +581,13 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.447 s
+ALOHA: aloha creates 2 routines in  0.142 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./HelAmps_MSSM_SLHA2.h
 INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./Parameters_MSSM_SLHA2.h
@@ -598,7 +596,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. 
 quit
 
-real	0m1.844s
-user	0m1.263s
+real	0m1.602s
+user	0m1.221s
 sys	0m0.069s
 Code generation completed in 2 seconds

From d8bb2ca6f026553c3032f76e83d1302aa67abb2d Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Mon, 2 Sep 2024 18:25:36 +0200
Subject: [PATCH 39/50] [helas] regenerate gg_tt.mad, check all is ok

---
 .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt       | 26 ++++++++++---------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
index 3c9665b806..c87deb69b9 100644
--- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
+++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005519866943359375 [0m
+[1;32mDEBUG: model prefixing  takes 0.005797863006591797 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.008 s
+1 processes with 3 diagrams generated in 0.009 s
 Total: 1 processes with 3 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -177,7 +177,7 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
 INFO: Creating files in directory P1_gg_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -193,25 +193,27 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
 [1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx [1;30m[export_v4.py at line 6444][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1520][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1544][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1545][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1601][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1625][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1626][0m [0m
 Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
-Wrote files for 10 helas calls in 0.109 s
+Wrote files for 10 helas calls in 0.110 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.149 s
+ALOHA: aloha creates 2 routines in  0.150 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 4 routines in  0.134 s
+ALOHA: aloha creates 4 routines in  0.138 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.h
@@ -239,9 +241,9 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.133s
-user	0m1.651s
-sys	0m0.279s
+real	0m2.143s
+user	0m1.859s
+sys	0m0.281s
 Code generation completed in 2 seconds
 ************************************************************
 *                                                          *

From a9a93bbc7bcb4b705787210db21b327cfbb0e071 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Fri, 20 Sep 2024 11:33:20 +0200
Subject: [PATCH 40/50] [helas] move to upstream/master tput/tmad logs for
 easier merging

git checkout upstream/master tput/logs_* tmad/logs_*
---
 .../log_eemumu_mad_d_inl0_hrd0.txt            | 236 ++++++++--------
 .../log_eemumu_mad_f_inl0_hrd0.txt            | 242 +++++++++--------
 .../log_eemumu_mad_m_inl0_hrd0.txt            | 246 +++++++++--------
 .../log_ggtt_mad_d_inl0_hrd0.txt              | 238 ++++++++--------
 .../log_ggtt_mad_f_inl0_hrd0.txt              | 242 +++++++++--------
 .../log_ggtt_mad_m_inl0_hrd0.txt              | 236 ++++++++--------
 .../log_ggttg_mad_d_inl0_hrd0.txt             | 244 +++++++++--------
 .../log_ggttg_mad_f_inl0_hrd0.txt             | 242 +++++++++--------
 .../log_ggttg_mad_m_inl0_hrd0.txt             | 250 +++++++++--------
 .../log_ggttgg_mad_d_inl0_hrd0.txt            | 252 +++++++++--------
 .../log_ggttgg_mad_f_inl0_hrd0.txt            | 253 +++++++++--------
 .../log_ggttgg_mad_m_inl0_hrd0.txt            | 252 +++++++++--------
 .../log_ggttggg_mad_d_inl0_hrd0.txt           | 250 +++++++++--------
 .../log_ggttggg_mad_f_inl0_hrd0.txt           | 252 +++++++++--------
 .../log_ggttggg_mad_m_inl0_hrd0.txt           | 254 ++++++++---------
 .../log_gqttq_mad_d_inl0_hrd0.txt             | 254 ++++++++---------
 .../log_gqttq_mad_f_inl0_hrd0.txt             | 248 +++++++++--------
 .../log_gqttq_mad_m_inl0_hrd0.txt             | 254 ++++++++---------
 .../log_heftggbb_mad_d_inl0_hrd0.txt          | 240 ++++++++--------
 .../log_heftggbb_mad_f_inl0_hrd0.txt          |  88 +++---
 .../log_heftggbb_mad_m_inl0_hrd0.txt          | 253 +++++++++--------
 .../log_smeftggtttt_mad_d_inl0_hrd0.txt       | 248 +++++++++--------
 .../log_smeftggtttt_mad_f_inl0_hrd0.txt       | 250 +++++++++--------
 .../log_smeftggtttt_mad_m_inl0_hrd0.txt       | 254 ++++++++---------
 .../log_susyggt1t1_mad_d_inl0_hrd0.txt        | 248 +++++++++--------
 .../log_susyggt1t1_mad_f_inl0_hrd0.txt        | 252 +++++++++--------
 .../log_susyggt1t1_mad_m_inl0_hrd0.txt        | 240 ++++++++--------
 .../log_susyggtt_mad_d_inl0_hrd0.txt          | 246 +++++++++--------
 .../log_susyggtt_mad_f_inl0_hrd0.txt          | 256 +++++++++---------
 .../log_susyggtt_mad_m_inl0_hrd0.txt          | 252 +++++++++--------
 .../log_eemumu_mad_d_inl0_hrd0.txt            | 128 +++++----
 .../log_eemumu_mad_d_inl0_hrd0_bridge.txt     | 134 +++++----
 .../log_eemumu_mad_d_inl0_hrd0_common.txt     | 128 +++++----
 .../log_eemumu_mad_d_inl0_hrd0_curhst.txt     | 128 +++++----
 .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt     | 128 +++++----
 .../log_eemumu_mad_d_inl0_hrd1.txt            | 128 +++++----
 .../log_eemumu_mad_d_inl1_hrd0.txt            | 128 +++++----
 .../log_eemumu_mad_d_inl1_hrd1.txt            | 128 +++++----
 .../log_eemumu_mad_f_inl0_hrd0.txt            | 130 +++++----
 .../log_eemumu_mad_f_inl0_hrd0_bridge.txt     | 136 ++++++----
 .../log_eemumu_mad_f_inl0_hrd0_common.txt     | 130 +++++----
 .../log_eemumu_mad_f_inl0_hrd0_curhst.txt     | 130 +++++----
 .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt     | 130 +++++----
 .../log_eemumu_mad_f_inl0_hrd1.txt            | 130 +++++----
 .../log_eemumu_mad_f_inl1_hrd0.txt            | 130 +++++----
 .../log_eemumu_mad_f_inl1_hrd1.txt            | 130 +++++----
 .../log_eemumu_mad_m_inl0_hrd0.txt            | 128 +++++----
 .../log_eemumu_mad_m_inl0_hrd1.txt            | 128 +++++----
 .../log_ggtt_mad_d_inl0_hrd0.txt              | 130 +++++----
 .../log_ggtt_mad_d_inl0_hrd0_bridge.txt       | 136 ++++++----
 .../log_ggtt_mad_d_inl0_hrd0_common.txt       | 130 +++++----
 .../log_ggtt_mad_d_inl0_hrd0_curhst.txt       | 130 +++++----
 .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt       | 130 +++++----
 .../log_ggtt_mad_d_inl0_hrd1.txt              | 132 +++++----
 .../log_ggtt_mad_d_inl1_hrd0.txt              | 128 +++++----
 .../log_ggtt_mad_d_inl1_hrd1.txt              | 130 +++++----
 .../log_ggtt_mad_f_inl0_hrd0.txt              | 132 +++++----
 .../log_ggtt_mad_f_inl0_hrd0_bridge.txt       | 138 ++++++----
 .../log_ggtt_mad_f_inl0_hrd0_common.txt       | 132 +++++----
 .../log_ggtt_mad_f_inl0_hrd0_curhst.txt       | 132 +++++----
 .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt       | 132 +++++----
 .../log_ggtt_mad_f_inl0_hrd1.txt              | 132 +++++----
 .../log_ggtt_mad_f_inl1_hrd0.txt              | 142 +++++-----
 .../log_ggtt_mad_f_inl1_hrd1.txt              | 138 +++++-----
 .../log_ggtt_mad_m_inl0_hrd0.txt              | 128 +++++----
 .../log_ggtt_mad_m_inl0_hrd1.txt              | 132 +++++----
 .../log_ggttg_mad_d_inl0_hrd0.txt             | 140 +++++-----
 .../log_ggttg_mad_d_inl0_hrd0_bridge.txt      | 147 +++++-----
 .../log_ggttg_mad_d_inl0_hrd1.txt             | 140 +++++-----
 .../log_ggttg_mad_f_inl0_hrd0.txt             | 144 +++++-----
 .../log_ggttg_mad_f_inl0_hrd0_bridge.txt      | 151 ++++++-----
 .../log_ggttg_mad_f_inl0_hrd1.txt             | 142 +++++-----
 .../log_ggttg_mad_m_inl0_hrd0.txt             | 142 +++++-----
 .../log_ggttg_mad_m_inl0_hrd1.txt             | 142 +++++-----
 .../log_ggttgg_mad_d_inl0_hrd0.txt            | 144 +++++-----
 .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt     | 151 ++++++-----
 .../log_ggttgg_mad_d_inl0_hrd0_common.txt     | 144 +++++-----
 .../log_ggttgg_mad_d_inl0_hrd0_curhst.txt     | 144 +++++-----
 .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt     | 144 +++++-----
 .../log_ggttgg_mad_d_inl0_hrd1.txt            | 144 +++++-----
 .../log_ggttgg_mad_d_inl1_hrd0.txt            | 144 +++++-----
 .../log_ggttgg_mad_d_inl1_hrd1.txt            | 148 +++++-----
 .../log_ggttgg_mad_f_inl0_hrd0.txt            | 144 +++++-----
 .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt     | 151 ++++++-----
 .../log_ggttgg_mad_f_inl0_hrd0_common.txt     | 144 +++++-----
 .../log_ggttgg_mad_f_inl0_hrd0_curhst.txt     | 144 +++++-----
 .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt     | 144 +++++-----
 .../log_ggttgg_mad_f_inl0_hrd1.txt            | 144 +++++-----
 .../log_ggttgg_mad_f_inl1_hrd0.txt            | 148 +++++-----
 .../log_ggttgg_mad_f_inl1_hrd1.txt            | 148 +++++-----
 .../log_ggttgg_mad_m_inl0_hrd0.txt            | 144 +++++-----
 .../log_ggttgg_mad_m_inl0_hrd1.txt            | 144 +++++-----
 .../log_ggttggg_mad_d_inl0_hrd0.txt           | 175 ++++++------
 .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt    | 182 +++++++------
 .../log_ggttggg_mad_d_inl0_hrd1.txt           | 175 ++++++------
 .../log_ggttggg_mad_f_inl0_hrd0.txt           | 175 ++++++------
 .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt    | 182 +++++++------
 .../log_ggttggg_mad_f_inl0_hrd1.txt           | 175 ++++++------
 .../log_ggttggg_mad_m_inl0_hrd0.txt           | 175 ++++++------
 .../log_ggttggg_mad_m_inl0_hrd1.txt           | 175 ++++++------
 .../log_gqttq_mad_d_inl0_hrd0.txt             | 140 +++++-----
 .../log_gqttq_mad_d_inl0_hrd0_bridge.txt      | 147 +++++-----
 .../log_gqttq_mad_d_inl0_hrd1.txt             | 140 +++++-----
 .../log_gqttq_mad_f_inl0_hrd0.txt             | 144 +++++-----
 .../log_gqttq_mad_f_inl0_hrd0_bridge.txt      | 151 ++++++-----
 .../log_gqttq_mad_f_inl0_hrd1.txt             | 144 +++++-----
 .../log_gqttq_mad_m_inl0_hrd0.txt             | 140 +++++-----
 .../log_gqttq_mad_m_inl0_hrd1.txt             | 140 +++++-----
 .../log_heftggbb_mad_d_inl0_hrd0.txt          | 130 +++++----
 .../log_heftggbb_mad_d_inl0_hrd1.txt          | 130 +++++----
 .../log_heftggbb_mad_f_inl0_hrd0.txt          | 130 +++++----
 .../log_heftggbb_mad_f_inl0_hrd1.txt          | 130 +++++----
 .../log_heftggbb_mad_m_inl0_hrd0.txt          | 130 +++++----
 .../log_heftggbb_mad_m_inl0_hrd1.txt          | 130 +++++----
 .../log_smeftggtttt_mad_d_inl0_hrd0.txt       | 142 +++++-----
 .../log_smeftggtttt_mad_d_inl0_hrd1.txt       | 142 +++++-----
 .../log_smeftggtttt_mad_f_inl0_hrd0.txt       | 142 +++++-----
 .../log_smeftggtttt_mad_f_inl0_hrd1.txt       | 142 +++++-----
 .../log_smeftggtttt_mad_m_inl0_hrd0.txt       | 142 +++++-----
 .../log_smeftggtttt_mad_m_inl0_hrd1.txt       | 142 +++++-----
 .../log_susyggt1t1_mad_d_inl0_hrd0.txt        | 132 +++++----
 .../log_susyggt1t1_mad_d_inl0_hrd1.txt        | 130 +++++----
 .../log_susyggt1t1_mad_f_inl0_hrd0.txt        | 132 +++++----
 .../log_susyggt1t1_mad_f_inl0_hrd1.txt        | 132 +++++----
 .../log_susyggt1t1_mad_m_inl0_hrd0.txt        | 132 +++++----
 .../log_susyggt1t1_mad_m_inl0_hrd1.txt        | 130 +++++----
 .../log_susyggtt_mad_d_inl0_hrd0.txt          | 130 +++++----
 .../log_susyggtt_mad_d_inl0_hrd1.txt          | 132 +++++----
 .../log_susyggtt_mad_f_inl0_hrd0.txt          | 132 +++++----
 .../log_susyggtt_mad_f_inl0_hrd1.txt          | 132 +++++----
 .../log_susyggtt_mad_m_inl0_hrd0.txt          | 128 +++++----
 .../log_susyggtt_mad_m_inl0_hrd1.txt          | 132 +++++----
 132 files changed, 11613 insertions(+), 9907 deletions(-)

diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt
index 905d729f9f..a32be077f9 100644
--- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt
@@ -1,10 +1,10 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 
 
-
 make USEBUILDDIR=1 BACKEND=cuda
 
 make USEBUILDDIR=1 BACKEND=cppnone
+
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
 
@@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_00:51:22
+DATE: 2024-09-18_13:40:30
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0
- [UNWEIGHT] Wrote 3798 events (found 8192 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7770s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7689s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0082s for     8192 events => throughput is 1.00E+06 events/s
+ [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 3837 events (found 8192 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7474s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7399s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0075s for     8192 events => throughput is 1.09E+06 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0
- [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1876s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1795s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0082s for     8192 events => throughput is 1.01E+06 events/s
+ [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2197s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2120s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0076s for     8192 events => throughput is 1.07E+06 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0
- [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3850s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2996s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0854s for    90112 events => throughput is 1.06E+06 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7224s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6476s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0748s for    81920 events => throughput is 1.10E+06 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382715404661545E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1832s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1756s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0072s for     8192 events => throughput is 1.13E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.09243 [9.2432789448173944E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2211s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2136s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0071s for     8192 events => throughput is 1.15E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661545E-002) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173944E-002) differ by less than 3E-14 (4.440892098500626e-16)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3761s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2962s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0795s for    90112 events => throughput is 1.13E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7334s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6611s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0720s for    81920 events => throughput is 1.14E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000753E-002) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.150373e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.150298e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.153459e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.170213e+06                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1814s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1765s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0046s for     8192 events => throughput is 1.80E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.09243 [9.2432789448173944E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2169s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2122s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0044s for     8192 events => throughput is 1.87E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173944E-002) differ by less than 3E-14 (4.440892098500626e-16)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3434s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2950s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0480s for    90112 events => throughput is 1.88E+06 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7002s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6558s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0441s for    81920 events => throughput is 1.86E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000753E-002) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.890693e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.910014e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.963061e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.998657e+06                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1825s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1788s
+ [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2169s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2132s
  [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0034s for     8192 events => throughput is 2.40E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3319s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2956s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0359s for    90112 events => throughput is 2.51E+06 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6805s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6468s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0335s for    81920 events => throughput is 2.45E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002) differ by less than 3E-14 (3.3306690738754696e-16)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.565379e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.599120e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.720140e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.638604e+06                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1800s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1763s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0034s for     8192 events => throughput is 2.39E+06 events/s
+ [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2150s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2114s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0033s for     8192 events => throughput is 2.46E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3314s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2959s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0352s for    90112 events => throughput is 2.56E+06 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6827s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6499s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0325s for    81920 events => throughput is 2.52E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002) differ by less than 3E-14 (3.3306690738754696e-16)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.605905e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.664038e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.811741e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.739981e+06                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1801s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1754s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0042s for     8192 events => throughput is 1.93E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2192s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2148s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0041s for     8192 events => throughput is 2.02E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3433s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2992s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0437s for    90112 events => throughput is 2.06E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6880s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6476s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0401s for    81920 events => throughput is 2.05E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002) differ by less than 3E-14 (3.3306690738754696e-16)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519892E-002) differ by less than 3E-14 (0.0)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.041894e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.061679e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.231255e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.165474e+06                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6180s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6168s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.42E+07 events/s
+ [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6554s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6518s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.67E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3382715404661532E-002) and cuda (9.3382715404661532E-002) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (9.2432789448173985E-002) and cuda (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7402s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7344s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0051s for    90112 events => throughput is 1.75E+07 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.0937s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.0854s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0078s for    81920 events => throughput is 1.06E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1515602020000766E-002) and cuda (9.1515602020000753E-002) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (9.1711103909519892E-002) and cuda (9.1711103909519892E-002) differ by less than 3E-14 (0.0)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.310247e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.180467e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.937972e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.444487e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.121321e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.131686e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.494743e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.605423e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.121828e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.162553e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.011369e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.757987e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.127939e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.185886e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.127590e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.069069e+08                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt
index b3dd9fc681..d760c23b34 100644
--- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt
@@ -1,13 +1,13 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 
-
 make USEBUILDDIR=1 BACKEND=cuda
 
 make USEBUILDDIR=1 BACKEND=cppnone
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make USEBUILDDIR=1 BACKEND=cppavx2
 
+
+make USEBUILDDIR=1 BACKEND=cppavx2
 make USEBUILDDIR=1 BACKEND=cpp512y
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_00:51:39
+DATE: 2024-09-18_13:40:49
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0
- [UNWEIGHT] Wrote 3798 events (found 8192 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7299s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7218s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0081s for     8192 events => throughput is 1.01E+06 events/s
+ [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 3837 events (found 8192 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7432s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7356s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0075s for     8192 events => throughput is 1.09E+06 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0
- [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1894s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1815s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0079s for     8192 events => throughput is 1.04E+06 events/s
+ [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2222s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2147s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0076s for     8192 events => throughput is 1.08E+06 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0
- [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3829s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2975s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0854s for    90112 events => throughput is 1.05E+06 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7385s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6605s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0780s for    81920 events => throughput is 1.05E+06 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382703205998396E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1854s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1780s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0071s for     8192 events => throughput is 1.16E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.09243 [9.2432777382586498E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2258s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2183s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0072s for     8192 events => throughput is 1.13E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382703205998396E-002) differ by less than 4E-4 (1.306308462512007e-07)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432777382586498E-002) differ by less than 4E-4 (1.305336294610271e-07)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09152 [9.1515590123565249E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3735s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2979s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0753s for    90112 events => throughput is 1.20E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.09171 [9.1711091925143637E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7135s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6453s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0680s for    81920 events => throughput is 1.20E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515590123565249E-002) differ by less than 4E-4 (1.2999352305698153e-07)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711091925143637E-002) differ by less than 4E-4 (1.3067530257870885e-07)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.222498e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.221041e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.223515e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.228624e+06                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382700723828302E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1791s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1762s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0027s for     8192 events => throughput is 3.02E+06 events/s
+ [XSECTION] Cross section = 0.09243 [9.2432774839452045E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2147s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2117s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0028s for     8192 events => throughput is 2.95E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382700723828302E-002) differ by less than 4E-4 (1.5721146218172777e-07)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432774839452045E-002) differ by less than 4E-4 (1.5804696607002455e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09152 [9.1515587612890761E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3264s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2967s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0295s for    90112 events => throughput is 3.06E+06 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711089416628339E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6780s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6503s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0275s for    81920 events => throughput is 2.98E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587612890761E-002) differ by less than 4E-4 (1.5742791048545257e-07)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711089416628339E-002) differ by less than 4E-4 (1.5802766439865223e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.184263e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.101743e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.305557e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.221746e+06                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1821s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1792s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0026s for     8192 events => throughput is 3.14E+06 events/s
+ [XSECTION] Cross section = 0.09243 [9.2432774915924193E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2192s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2165s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0025s for     8192 events => throughput is 3.25E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382700679354239E-002) differ by less than 4E-4 (1.576877179942926e-07)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432774915924193E-002) differ by less than 4E-4 (1.5721963908532643e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3282s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3003s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0277s for    90112 events => throughput is 3.25E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.09171 [9.1711089453554426E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6719s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6467s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0250s for    81920 events => throughput is 3.28E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587619408464E-002) differ by less than 4E-4 (1.573566908996682e-07)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711089453554426E-002) differ by less than 4E-4 (1.5762502958427405e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.324945e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.474277e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.611979e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.584047e+06                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1823s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1796s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0025s for     8192 events => throughput is 3.31E+06 events/s
+ [XSECTION] Cross section = 0.09243 [9.2432774915924193E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2174s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2146s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0025s for     8192 events => throughput is 3.25E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382700679354239E-002) differ by less than 4E-4 (1.576877179942926e-07)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432774915924193E-002) differ by less than 4E-4 (1.5721963908532643e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3274s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3004s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0268s for    90112 events => throughput is 3.37E+06 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711089453554426E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6748s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6505s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0241s for    81920 events => throughput is 3.40E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587619408464E-002) differ by less than 4E-4 (1.573566908996682e-07)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711089453554426E-002) differ by less than 4E-4 (1.5762502958427405e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.421728e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.456987e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.720848e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.708350e+06                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382704335459282E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1835s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1805s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0027s for     8192 events => throughput is 3.08E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.09243 [9.2432778556608516E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2238s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2208s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0028s for     8192 events => throughput is 2.95E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382704335459282E-002) differ by less than 4E-4 (1.1853587900123586e-07)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432778556608516E-002) differ by less than 4E-4 (1.1783227071848756e-07)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09152 [9.1515591296252558E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3317s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3033s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0282s for    90112 events => throughput is 3.20E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.09171 [9.1711093118690828E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6837s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6578s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0257s for    81920 events => throughput is 3.19E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515591296252558E-002) differ by less than 4E-4 (1.1717945325173673e-07)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711093118690828E-002) differ by less than 4E-4 (1.1766109664357316e-07)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.234507e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.378249e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.458576e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.571882e+06                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382706077425631E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6162s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6151s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.46E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.09243 [9.2432780016531851E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6559s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6524s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0030s for     8192 events => throughput is 2.77E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3382715404661532E-002) and cuda (9.3382706077425631E-002) differ by less than 4E-4 (9.988182347875352e-08)
+OK! xsec from fortran (9.2432789448173985E-002) and cuda (9.2432780016531851E-002) differ by less than 4E-4 (1.0203783951112655e-07)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09152 [9.1515592892887687E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7317s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7264s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0047s for    90112 events => throughput is 1.91E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.09171 [9.1711094767039689E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.0956s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.0874s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0076s for    81920 events => throughput is 1.07E+07 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1515602020000766E-002) and cuda (9.1515592892887687E-002) differ by less than 4E-4 (9.973286385633884e-08)
+OK! xsec from fortran (9.1711103909519892E-002) and cuda (9.1711094767039689E-002) differ by less than 4E-4 (9.968782199720749e-08)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.706504e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.223914e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.545522e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.489581e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.466129e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.046619e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.910413e+09                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.917172e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.590166e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.064257e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.100934e+09                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.895930e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.844973e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.656385e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.803087e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.636564e+08                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt
index 6dc1fb2130..3678e8e364 100644
--- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt
@@ -1,27 +1,27 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 
-
 make USEBUILDDIR=1 BACKEND=cuda
 
+
+
 make USEBUILDDIR=1 BACKEND=cppnone
 
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
-
 make USEBUILDDIR=1 BACKEND=cpp512y
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_00:51:54
+DATE: 2024-09-18_13:41:08
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0
- [UNWEIGHT] Wrote 3798 events (found 8192 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7125s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7045s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0081s for     8192 events => throughput is 1.02E+06 events/s
+ [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 3837 events (found 8192 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7466s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7391s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0075s for     8192 events => throughput is 1.09E+06 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0
- [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1842s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1761s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0081s for     8192 events => throughput is 1.01E+06 events/s
+ [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2178s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2101s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0077s for     8192 events => throughput is 1.06E+06 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0
- [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3846s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2988s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0858s for    90112 events => throughput is 1.05E+06 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7260s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6506s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0754s for    81920 events => throughput is 1.09E+06 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382715420701395E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1833s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1754s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0075s for     8192 events => throughput is 1.09E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.09243 [9.2432789444986618E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2203s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2127s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0073s for     8192 events => throughput is 1.13E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715420701395E-002) differ by less than 2E-4 (1.7176482458580722e-10)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444986618E-002) differ by less than 2E-4 (3.448308305564751e-11)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3781s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2967s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0811s for    90112 events => throughput is 1.11E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.09171 [9.1711103904317928E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7363s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6616s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0744s for    81920 events => throughput is 1.10E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602033080859E-002) differ by less than 2E-4 (1.4292744765498355e-10)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103904317928E-002) differ by less than 2E-4 (5.6721183305796785e-11)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.135905e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.133764e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.146239e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.147681e+06                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382715420701354E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1797s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1749s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0044s for     8192 events => throughput is 1.86E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.09243 [9.2432789444986618E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2195s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2148s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0044s for     8192 events => throughput is 1.88E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715420701354E-002) differ by less than 2E-4 (1.7176438049659737e-10)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444986618E-002) differ by less than 2E-4 (3.448308305564751e-11)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3400s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2935s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0462s for    90112 events => throughput is 1.95E+06 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711103904317928E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6934s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6496s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0435s for    81920 events => throughput is 1.88E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602033080859E-002) differ by less than 2E-4 (1.4292744765498355e-10)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103904317928E-002) differ by less than 2E-4 (5.6721183305796785e-11)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.989312e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.996644e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.057240e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.048925e+06                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1786s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1748s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0035s for     8192 events => throughput is 2.37E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2177s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2140s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0034s for     8192 events => throughput is 2.44E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715383664494E-002) differ by less than 2E-4 (2.2484925032983938e-10)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444494415E-002) differ by less than 2E-4 (3.980804574865715e-11)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3305s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2941s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0360s for    90112 events => throughput is 2.51E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6893s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6561s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0329s for    81920 events => throughput is 2.49E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002) differ by less than 2E-4 (2.947131427788463e-11)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063451E-002) differ by less than 2E-4 (1.1401501964769523e-10)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.520248e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.590922e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.584398e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.665063e+06                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1789s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1753s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0033s for     8192 events => throughput is 2.51E+06 events/s
+ [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2169s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2133s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0033s for     8192 events => throughput is 2.50E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715383664494E-002) differ by less than 2E-4 (2.2484925032983938e-10)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444494415E-002) differ by less than 2E-4 (3.980804574865715e-11)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3318s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2962s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0352s for    90112 events => throughput is 2.56E+06 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6867s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6530s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0334s for    81920 events => throughput is 2.45E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002) differ by less than 2E-4 (2.947131427788463e-11)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063451E-002) differ by less than 2E-4 (1.1401501964769523e-10)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.641406e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.604540e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.794868e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.589532e+06                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1799s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1755s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0040s for     8192 events => throughput is 2.05E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2176s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2134s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0039s for     8192 events => throughput is 2.12E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715383664494E-002) differ by less than 2E-4 (2.2484925032983938e-10)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444494415E-002) differ by less than 2E-4 (3.980804574865715e-11)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3390s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2970s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0415s for    90112 events => throughput is 2.17E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6977s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6595s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0378s for    81920 events => throughput is 2.17E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002) differ by less than 2E-4 (2.947131427788463e-11)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063451E-002) differ by less than 2E-4 (1.1401501964769523e-10)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.196903e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.201898e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.277543e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.266955e+06                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09338 [9.3382715392009194E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1591 events (found 1595 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6049s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6037s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.47E+07 events/s
+ [XSECTION] Cross section = 0.09243 [9.2432789437826970E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6541s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6503s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0032s for     8192 events => throughput is 2.59E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3382715404661532E-002) and cuda (9.3382715392009194E-002) differ by less than 2E-4 (1.3548906441229747e-10)
+OK! xsec from fortran (9.2432789448173985E-002) and cuda (9.2432789437826970E-002) differ by less than 2E-4 (1.1194101201539297e-10)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09152 [9.1515602021089631E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1782 events (found 1787 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7291s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7235s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0050s for    90112 events => throughput is 1.80E+07 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711103901050417E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.0910s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.0820s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0084s for    81920 events => throughput is 9.79E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1515602020000766E-002) and cuda (9.1515602021089631E-002) differ by less than 2E-4 (1.1898038110302878e-11)
+OK! xsec from fortran (9.1711103909519892E-002) and cuda (9.1711103901050417E-002) differ by less than 2E-4 (9.234946141134515e-11)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.348676e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.081337e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.930073e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.286137e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.125050e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.251289e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.498988e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.774363e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.104130e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.280291e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.086621e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.840047e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.108048e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.258897e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.158026e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.124478e+08                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt
index 931fcf1c66..21d2f45edf 100644
--- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt
@@ -1,20 +1,20 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 
-
 make USEBUILDDIR=1 BACKEND=cuda
 
 
 make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppsse4
-make USEBUILDDIR=1 BACKEND=cppavx2
 
+
+make USEBUILDDIR=1 BACKEND=cppavx2
 make USEBUILDDIR=1 BACKEND=cpp512y
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_00:52:11
+DATE: 2024-09-18_13:41:27
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0
- [UNWEIGHT] Wrote 2601 events (found 5405 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8200s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7778s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0421s for     8192 events => throughput is 1.94E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0
+ [UNWEIGHT] Wrote 2613 events (found 5374 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8485s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8070s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0415s for     8192 events => throughput is 1.97E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4177s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3756s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0421s for     8192 events => throughput is 1.95E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4498s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4077s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0420s for     8192 events => throughput is 1.95E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0
- [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7786s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3175s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.4612s for    90112 events => throughput is 1.95E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.9562s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5349s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4212s for    81920 events => throughput is 1.94E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4176s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3724s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0448s for     8192 events => throughput is 1.83E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4516s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4062s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0450s for     8192 events => throughput is 1.82E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756647) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034162) differ by less than 3E-14 (0.0)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.11 [47.105695279989099] fbridge_mode=1
- [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8262s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3290s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4967s for    90112 events => throughput is 1.81E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.9866s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5393s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4468s for    81920 events => throughput is 1.83E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989099) differ by less than 3E-14 (3.3306690738754696e-16)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.846425e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.851914e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.873611e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.872591e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3985s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3726s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0255s for     8192 events => throughput is 3.21E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4375s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4102s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0269s for     8192 events => throughput is 3.04E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756647) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034162) differ by less than 3E-14 (0.0)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.11 [47.105695279989106] fbridge_mode=1
- [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.6129s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3312s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2813s for    90112 events => throughput is 3.20E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7905s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5386s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2515s for    81920 events => throughput is 3.26E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989106) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.265834e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.305635e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.263600e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.371218e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3886s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3723s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0158s for     8192 events => throughput is 5.18E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4200s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4039s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0158s for     8192 events => throughput is 5.19E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756647) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034162) differ by less than 3E-14 (0.0)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.11 [47.105695279989135] fbridge_mode=1
- [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4959s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3212s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1743s for    90112 events => throughput is 5.17E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7022s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5439s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1579s for    81920 events => throughput is 5.19E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989135) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.224711e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.172913e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.211640e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.264385e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3872s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3723s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0144s for     8192 events => throughput is 5.68E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4192s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4041s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0147s for     8192 events => throughput is 5.59E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756647) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034162) differ by less than 3E-14 (0.0)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.11 [47.105695279989135] fbridge_mode=1
- [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4782s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3196s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1582s for    90112 events => throughput is 5.70E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6806s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5367s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1435s for    81920 events => throughput is 5.71E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989135) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.751977e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.786323e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.909222e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.850142e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3958s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3729s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0224s for     8192 events => throughput is 3.66E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 47.14 [47.138611968034169] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4275s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4043s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0228s for     8192 events => throughput is 3.60E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756647) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034169) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.11 [47.105695279989135] fbridge_mode=1
- [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5779s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3255s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2519s for    90112 events => throughput is 3.58E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7739s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5476s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2259s for    81920 events => throughput is 3.63E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989135) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.640250e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.522776e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.582672e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.620788e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8092s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8078s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.31E+07 events/s
+ [XSECTION] Cross section = 47.14 [47.138611968034176] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8511s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8471s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0032s for     8192 events => throughput is 2.57E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.094184803756640) and cuda (47.094184803756640) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (47.138611968034162) and cuda (47.138611968034176) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1
- [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7627s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7552s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0068s for    90112 events => throughput is 1.33E+07 events/s
+ [XSECTION] Cross section = 47.14 [47.144596232268178] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.9977s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9877s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0093s for    81920 events => throughput is 8.83E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.105695279989114) and cuda (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (47.144596232268185) and cuda (47.144596232268178) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.145115e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.921444e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.615882e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.230318e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.253648e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.714613e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.083832e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.316499e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.252346e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.728492e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.158233e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.598150e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.242136e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.745533e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.076942e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.694862e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt
index 7b5a930bcd..0850891597 100644
--- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt
@@ -14,16 +14,16 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_00:52:37
+DATE: 2024-09-18_13:41:56
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0
- [UNWEIGHT] Wrote 2601 events (found 5405 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8269s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7846s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0423s for     8192 events => throughput is 1.94E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0
+ [UNWEIGHT] Wrote 2613 events (found 5374 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8368s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7947s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0421s for     8192 events => throughput is 1.94E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4157s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3732s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0425s for     8192 events => throughput is 1.93E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4509s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4076s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0434s for     8192 events => throughput is 1.89E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0
- [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7811s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3167s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.4645s for    90112 events => throughput is 1.94E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.9677s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5472s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4205s for    81920 events => throughput is 1.95E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094179692708323] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4144s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3720s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0420s for     8192 events => throughput is 1.95E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 47.14 [47.138606099989779] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4454s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4032s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0418s for     8192 events => throughput is 1.96E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.094184803756640) and cpp (47.094179692708323) differ by less than 4E-4 (1.0852822573959031e-07)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138606099989779) differ by less than 4E-4 (1.2448487851646206e-07)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.11 [47.105688388783328] fbridge_mode=1
- [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7840s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3184s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4652s for    90112 events => throughput is 1.94E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 47.14 [47.144592707001024] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.9812s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5583s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4226s for    81920 events => throughput is 1.94E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.105695279989114) and cpp (47.105688388783328) differ by less than 4E-4 (1.462924120732012e-07)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144592707001024) differ by less than 4E-4 (7.477563590541081e-08)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.954584e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.959611e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.971648e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.959548e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094175707109216] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3908s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3733s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0173s for     8192 events => throughput is 4.75E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.138602111070696] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4226s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4048s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0175s for     8192 events => throughput is 4.69E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.094184803756640) and cpp (47.094175707109216) differ by less than 4E-4 (1.9315861321533845e-07)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138602111070696) differ by less than 4E-4 (2.091059336795098e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.11 [47.105684583433771] fbridge_mode=1
- [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5105s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3203s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1899s for    90112 events => throughput is 4.75E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 47.14 [47.144588828412729] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7203s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5477s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1724s for    81920 events => throughput is 4.75E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.105695279989114) and cpp (47.105684583433771) differ by less than 4E-4 (2.2707562807866566e-07)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144588828412729) differ by less than 4E-4 (1.570456860111591e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.743399e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.738872e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.751002e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.748017e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094173726920275] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3840s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3746s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0090s for     8192 events => throughput is 9.10E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.138602499179925] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4166s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4074s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0089s for     8192 events => throughput is 9.16E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.094184803756640) and cpp (47.094173726920275) differ by less than 4E-4 (2.3520603253945893e-07)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138602499179925) differ by less than 4E-4 (2.008725722424387e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.11 [47.105684037363524] fbridge_mode=1
- [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4196s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3180s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1013s for    90112 events => throughput is 8.89E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 47.14 [47.144586996341530] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6374s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5454s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0918s for    81920 events => throughput is 8.93E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.105695279989114) and cpp (47.105684037363524) differ by less than 4E-4 (2.386680745258829e-07)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144586996341530) differ by less than 4E-4 (1.9590636879396328e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.122881e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.120680e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.253637e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.193326e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094173726920275] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3836s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3748s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0086s for     8192 events => throughput is 9.55E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.138602499179925] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4176s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4084s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0089s for     8192 events => throughput is 9.23E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.094184803756640) and cpp (47.094173726920275) differ by less than 4E-4 (2.3520603253945893e-07)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138602499179925) differ by less than 4E-4 (2.008725722424387e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.11 [47.105684037363524] fbridge_mode=1
- [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4130s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3183s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0944s for    90112 events => throughput is 9.54E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.144586996341530] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6345s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5478s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0864s for    81920 events => throughput is 9.48E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.105695279989114) and cpp (47.105684037363524) differ by less than 4E-4 (2.386680745258829e-07)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144586996341530) differ by less than 4E-4 (1.9590636879396328e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.492627e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.788116e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.845068e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.789950e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094178448427996] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3872s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3747s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0121s for     8192 events => throughput is 6.75E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 47.14 [47.138606840950104] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4189s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4068s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0118s for     8192 events => throughput is 6.95E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.094184803756640) and cpp (47.094178448427996) differ by less than 4E-4 (1.3494932904478674e-07)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138606840950104) differ by less than 4E-4 (1.0876612277499476e-07)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.11 [47.105688391432061] fbridge_mode=1
- [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4540s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3208s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1329s for    90112 events => throughput is 6.78E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.144591429357156] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6732s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5506s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1222s for    81920 events => throughput is 6.70E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.105695279989114) and cpp (47.105688391432061) differ by less than 4E-4 (1.462361824966507e-07)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144591429357156) differ by less than 4E-4 (1.0187617272006122e-07)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.760706e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.765000e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.862105e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.898629e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094184162782994] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8072s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8059s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.43E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 47.14 [47.138612402172164] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8533s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8496s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.65E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.094184803756640) and cuda (47.094184162782994) differ by less than 4E-4 (1.3610462645807786e-08)
+OK! xsec from fortran (47.138611968034162) and cuda (47.138612402172164) differ by less than 4E-4 (9.209817353195149e-09)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.11 [47.105694501043516] fbridge_mode=1
- [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7625s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7557s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0061s for    90112 events => throughput is 1.49E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 47.14 [47.144596666727985] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.9917s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9825s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0085s for    81920 events => throughput is 9.59E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.105695279989114) and cuda (47.105694501043516) differ by less than 4E-4 (1.6536123581545326e-08)
+OK! xsec from fortran (47.144596232268185) and cuda (47.144596666727985) differ by less than 4E-4 (9.215473939505614e-09)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.122844e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.139565e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.186028e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.535095e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.914924e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.504949e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.415260e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.302031e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.864435e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.479596e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.542103e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.319419e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.479715e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.230042e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.411070e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.720607e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt
index 424c9d3f7b..1cd7f5e3d4 100644
--- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt
@@ -1,10 +1,10 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 
+make USEBUILDDIR=1 BACKEND=cuda
 
 
-make USEBUILDDIR=1 BACKEND=cuda
-make USEBUILDDIR=1 BACKEND=cppnone
 
+make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
 
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_00:53:03
+DATE: 2024-09-18_13:42:23
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0
- [UNWEIGHT] Wrote 2601 events (found 5405 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8254s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7822s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0432s for     8192 events => throughput is 1.90E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0
+ [UNWEIGHT] Wrote 2613 events (found 5374 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8437s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8020s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0417s for     8192 events => throughput is 1.96E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4149s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3727s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0422s for     8192 events => throughput is 1.94E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4457s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4043s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0414s for     8192 events => throughput is 1.98E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0
- [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7857s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3213s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.4644s for    90112 events => throughput is 1.94E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.9802s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5584s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4219s for    81920 events => throughput is 1.94E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4173s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3713s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0455s for     8192 events => throughput is 1.80E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 47.14 [47.138613306947967] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4569s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4109s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0456s for     8192 events => throughput is 1.79E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.094184803756640) and cpp (47.094186141863901) differ by less than 2E-4 (2.8413428720952538e-08)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138613306947967) differ by less than 2E-4 (2.8403759566586473e-08)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.11 [47.105696630006634] fbridge_mode=1
- [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8209s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3186s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5019s for    90112 events => throughput is 1.80E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 47.14 [47.144597573367548] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0058s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5525s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4529s for    81920 events => throughput is 1.81E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.105695279989114) and cpp (47.105696630006634) differ by less than 2E-4 (2.865932691165085e-08)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144597573367548) differ by less than 2E-4 (2.8446512922997158e-08)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.821084e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.833075e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.828762e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.846422e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3975s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3723s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0247s for     8192 events => throughput is 3.32E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.138613306947953] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4330s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4080s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0246s for     8192 events => throughput is 3.33E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.094184803756640) and cpp (47.094186141863901) differ by less than 2E-4 (2.8413428720952538e-08)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138613306947953) differ by less than 2E-4 (2.8403759344541868e-08)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.11 [47.105696630006626] fbridge_mode=1
- [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5914s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3179s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2731s for    90112 events => throughput is 3.30E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.144597573367555] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.8037s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5546s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2487s for    81920 events => throughput is 3.29E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.105695279989114) and cpp (47.105696630006626) differ by less than 2E-4 (2.8659326689606246e-08)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144597573367555) differ by less than 2E-4 (2.8446512922997158e-08)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.312704e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.318121e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.281482e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.355189e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3879s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3719s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0156s for     8192 events => throughput is 5.26E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4204s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4041s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0159s for     8192 events => throughput is 5.15E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.094184803756640) and cpp (47.094186169585456) differ by less than 2E-4 (2.9002069412698006e-08)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138613350418019) differ by less than 2E-4 (2.932593479165746e-08)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1
- [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4909s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3179s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1726s for    90112 events => throughput is 5.22E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7037s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5502s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1532s for    81920 events => throughput is 5.35E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ by less than 2E-4 (2.9364318976377035e-08)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144597608209963) differ by less than 2E-4 (2.9185567074208052e-08)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.294208e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.280868e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.336642e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.319511e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3881s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3733s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0143s for     8192 events => throughput is 5.73E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4169s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4022s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0144s for     8192 events => throughput is 5.71E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.094184803756640) and cpp (47.094186169585456) differ by less than 2E-4 (2.9002069412698006e-08)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138613350418019) differ by less than 2E-4 (2.932593479165746e-08)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1
- [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4759s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3196s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1559s for    90112 events => throughput is 5.78E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6742s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5312s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1426s for    81920 events => throughput is 5.74E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ by less than 2E-4 (2.9364318976377035e-08)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144597608209963) differ by less than 2E-4 (2.9185567074208052e-08)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.832546e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.827488e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.943697e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.962674e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3971s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3744s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0223s for     8192 events => throughput is 3.68E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4277s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4052s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0221s for     8192 events => throughput is 3.71E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.094184803756640) and cpp (47.094186169585456) differ by less than 2E-4 (2.9002069412698006e-08)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138613350418019) differ by less than 2E-4 (2.932593479165746e-08)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1
- [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5696s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3242s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2449s for    90112 events => throughput is 3.68E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7709s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5477s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2228s for    81920 events => throughput is 3.68E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ by less than 2E-4 (2.9364318976377035e-08)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144597608209963) differ by less than 2E-4 (2.9185567074208052e-08)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.610993e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.662408e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.604644e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.633047e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.09 [47.094184798437830] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8101s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8086s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0007s for     8192 events => throughput is 1.25E+07 events/s
+ [XSECTION] Cross section = 47.14 [47.138611963547788] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8496s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8458s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.64E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.094184803756640) and cuda (47.094184798437830) differ by less than 2E-4 (1.1293987967064822e-10)
+OK! xsec from fortran (47.138611968034162) and cuda (47.138611963547788) differ by less than 2E-4 (9.517409083059647e-11)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.11 [47.105695279068492] fbridge_mode=1
- [UNWEIGHT] Wrote 1744 events (found 1749 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7563s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7487s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0067s for    90112 events => throughput is 1.34E+07 events/s
+ [XSECTION] Cross section = 47.14 [47.144596232269095] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.9898s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9799s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0091s for    81920 events => throughput is 8.98E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.105695279989114) and cuda (47.105695279068492) differ by less than 2E-4 (1.954369999168648e-11)
+OK! xsec from fortran (47.144596232268185) and cuda (47.144596232269095) differ by less than 2E-4 (1.9317880628477724e-14)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.016383e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.961867e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.577646e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.402195e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.254487e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.751023e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.057057e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.487612e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.262558e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.767038e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.142842e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.725223e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.258563e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.748403e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.974623e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.694986e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt
index 9853411ba7..652edcf84f 100644
--- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt
@@ -2,8 +2,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g
 
 
 make USEBUILDDIR=1 BACKEND=cuda
-make USEBUILDDIR=1 BACKEND=cppnone
 
+make USEBUILDDIR=1 BACKEND=cppnone
 
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
@@ -13,8 +13,8 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
-
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_00:53:30
+DATE: 2024-09-18_13:42:52
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0
- [UNWEIGHT] Wrote 365 events (found 1496 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7011s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3682s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.3329s for     8192 events => throughput is 2.46E+04 events/s
+ [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 387 events (found 1591 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7493s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4200s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.3293s for     8192 events => throughput is 2.49E+04 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0
- [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6659s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3318s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.3341s for     8192 events => throughput is 2.45E+04 events/s
+ [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7150s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3834s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.3316s for     8192 events => throughput is 2.47E+04 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0
- [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    5.2088s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5545s
- [COUNTERS] Fortran MEs      ( 1 ) :    3.6542s for    90112 events => throughput is 2.47E+04 events/s
+ [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    5.1953s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8899s
+ [COUNTERS] Fortran MEs      ( 1 ) :    3.3054s for    81920 events => throughput is 2.48E+04 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1
- [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6836s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3335s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3488s for     8192 events => throughput is 2.35E+04 events/s
+ [XSECTION] Cross section = 0.07847 [7.8471485809748553E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7369s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3879s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3477s for     8192 events => throughput is 2.36E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749110) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748553E-002) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    5.4042s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5693s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    3.8337s for    90112 events => throughput is 2.35E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
+ [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    5.3596s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8952s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    3.4633s for    81920 events => throughput is 2.37E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717666E-002) differ by less than 3E-14 (6.661338147750939e-16)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.438451e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.471888e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.447901e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.456119e+04                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112748607748863] fbridge_mode=1
- [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.5127s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3301s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1819s for     8192 events => throughput is 4.50E+04 events/s
+ [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.5680s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3869s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1804s for     8192 events => throughput is 4.54E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607748863) differ by less than 3E-14 (2.453592884421596e-14)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748567E-002) differ by less than 3E-14 (0.0)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    3.5729s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5688s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    2.0033s for    90112 events => throughput is 4.50E+04 events/s
+ [XSECTION] Cross section = 0.07997 [7.9971656827279650E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    3.6931s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8866s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.8056s for    81920 events => throughput is 4.54E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717666E-002) differ by less than 3E-14 (6.661338147750939e-16)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279650E-002) differ by less than 3E-14 (4.440892098500626e-16)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.534029e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.653702e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.539640e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.694721e+04                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1
- [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4296s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3368s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0922s for     8192 events => throughput is 8.89E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 0.07847 [7.8471485809748595E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4803s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3892s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0905s for     8192 events => throughput is 9.05E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749110) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748595E-002) differ by less than 3E-14 (4.440892098500626e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    2.5686s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5656s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.0024s for    90112 events => throughput is 8.99E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.8119s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9071s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.9041s for    81920 events => throughput is 9.06E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.162844e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.368531e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.203130e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.254727e+04                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1
- [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4127s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3310s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0811s for     8192 events => throughput is 1.01E+05 events/s
+ [XSECTION] Cross section = 0.07847 [7.8471485809748595E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4668s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3854s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0809s for     8192 events => throughput is 1.01E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749110) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748595E-002) differ by less than 3E-14 (4.440892098500626e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    2.4702s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5733s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8963s for    90112 events => throughput is 1.01E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.6912s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8823s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8083s for    81920 events => throughput is 1.01E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.036026e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.047750e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.036765e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.045236e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1
- [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4471s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3305s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1158s for     8192 events => throughput is 7.07E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 0.07847 [7.8471485809748581E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.5013s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3866s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1140s for     8192 events => throughput is 7.19E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749110) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748581E-002) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    2.8416s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5733s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.2675s for    90112 events => throughput is 7.11E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    3.0360s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8971s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1383s for    81920 events => throughput is 7.20E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.157314e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.160157e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.179221e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.320569e+04                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=1
- [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7661s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7572s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0060s for     8192 events => throughput is 1.36E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0029s
+ [XSECTION] Cross section = 0.07847 [7.8471485809748553E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8352s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8227s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0094s for     8192 events => throughput is 8.68E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0030s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10112748607749111) and cuda (0.10112748607749111) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (7.8471485809748567E-002) and cuda (7.8471485809748553E-002) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07924 [7.9238481932717736E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    2.0255s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.9988s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0237s for    90112 events => throughput is 3.80E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0029s
+ [XSECTION] Cross section = 0.07997 [7.9971656827279636E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.3543s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.3252s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0261s for    81920 events => throughput is 3.14E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0031s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9238481932717722E-002) and cuda (7.9238481932717736E-002) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (7.9971656827279608E-002) and cuda (7.9971656827279636E-002) differ by less than 3E-14 (4.440892098500626e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.629113e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.134986e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.206213e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.475726e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.922280e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.339604e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.239743e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.161734e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.941872e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.354476e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.249673e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.170951e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.946587e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.318892e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.755614e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.662470e+06                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt
index 7af52fe973..3362abfbc9 100644
--- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_00:54:12
+DATE: 2024-09-18_13:43:35
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0
- [UNWEIGHT] Wrote 365 events (found 1496 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6937s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3614s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.3324s for     8192 events => throughput is 2.46E+04 events/s
+ [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 387 events (found 1591 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7388s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4086s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.3301s for     8192 events => throughput is 2.48E+04 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0
- [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6601s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3286s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.3315s for     8192 events => throughput is 2.47E+04 events/s
+ [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7145s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3839s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.3307s for     8192 events => throughput is 2.48E+04 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0
- [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    5.2320s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5618s
- [COUNTERS] Fortran MEs      ( 1 ) :    3.6702s for    90112 events => throughput is 2.46E+04 events/s
+ [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    5.1779s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8749s
+ [COUNTERS] Fortran MEs      ( 1 ) :    3.3030s for    81920 events => throughput is 2.48E+04 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112722616246457] fbridge_mode=1
- [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6650s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3303s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3336s for     8192 events => throughput is 2.46E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0010s
+ [XSECTION] Cross section = 0.07847 [7.8471473453718410E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7223s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3896s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3317s for     8192 events => throughput is 2.47E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0009s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10112748607749111) and cpp (0.10112722616246457) differ by less than 4E-4 (2.570171934723753e-06)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471473453718410E-002) differ by less than 4E-4 (1.574588530672827e-07)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07924 [7.9238468293717765E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    5.2417s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5684s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    3.6724s for    90112 events => throughput is 2.45E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0010s
+ [XSECTION] Cross section = 0.07997 [7.9971643267110940E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    5.2162s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8886s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    3.3267s for    81920 events => throughput is 2.46E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0009s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238468293717765E-002) differ by less than 4E-4 (1.721259623721494e-07)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971643267110940E-002) differ by less than 4E-4 (1.69562182517069e-07)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.532359e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.539712e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.532906e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.530008e+04                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112720694019242] fbridge_mode=1
- [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4525s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3399s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1120s for     8192 events => throughput is 7.31E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.07847 [7.8471459294758378E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4889s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3868s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1015s for     8192 events => throughput is 8.07E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10112748607749111) and cpp (0.10112720694019242) differ by less than 4E-4 (2.760251535116609e-06)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471459294758378E-002) differ by less than 4E-4 (3.37893311330717e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07924 [7.9238454783817719E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6896s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5677s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1213s for    90112 events => throughput is 8.04E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.07997 [7.9971629726281482E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.9019s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8838s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.0176s for    81920 events => throughput is 8.05E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238454783817719E-002) differ by less than 4E-4 (3.4262266690454624e-07)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971629726281482E-002) differ by less than 4E-4 (3.38882539141494e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.197742e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.124328e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.182250e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.199350e+04                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112721757974454] fbridge_mode=1
- [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3765s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3295s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0466s for     8192 events => throughput is 1.76E+05 events/s
+ [XSECTION] Cross section = 0.07847 [7.8471459718665412E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4313s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3842s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0467s for     8192 events => throughput is 1.75E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10112748607749111) and cpp (0.10112721757974454) differ by less than 4E-4 (2.655042234289695e-06)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471459718665412E-002) differ by less than 4E-4 (3.324912595248364e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07924 [7.9238453732924513E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    2.0876s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5731s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5140s for    90112 events => throughput is 1.75E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 0.07997 [7.9971629259822388E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.3507s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8840s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4663s for    81920 events => throughput is 1.76E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238453732924513E-002) differ by less than 4E-4 (3.558850765195132e-07)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971629259822388E-002) differ by less than 4E-4 (3.447153443802975e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.789700e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.796536e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.781504e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.786454e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112721757974454] fbridge_mode=1
- [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3742s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3309s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0429s for     8192 events => throughput is 1.91E+05 events/s
+ [XSECTION] Cross section = 0.07847 [7.8471459718665412E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4292s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3860s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0428s for     8192 events => throughput is 1.91E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10112748607749111) and cpp (0.10112721757974454) differ by less than 4E-4 (2.655042234289695e-06)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471459718665412E-002) differ by less than 4E-4 (3.324912595248364e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07924 [7.9238453732924513E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    2.0350s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5670s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4675s for    90112 events => throughput is 1.93E+05 events/s
+ [XSECTION] Cross section = 0.07997 [7.9971629259822388E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.3290s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9023s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4263s for    81920 events => throughput is 1.92E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238453732924513E-002) differ by less than 4E-4 (3.558850765195132e-07)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971629259822388E-002) differ by less than 4E-4 (3.447153443802975e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.973486e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.981352e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.975490e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.997761e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112723389095883] fbridge_mode=1
- [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3896s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3323s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0567s for     8192 events => throughput is 1.44E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.07847 [7.8471471932611128E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4486s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3925s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0556s for     8192 events => throughput is 1.47E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10112748607749111) and cpp (0.10112723389095883) differ by less than 4E-4 (2.493748653908945e-06)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471471932611128E-002) differ by less than 4E-4 (1.768430569759616e-07)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07924 [7.9238464413054557E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    2.1882s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5687s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.6189s for    90112 events => throughput is 1.46E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.07997 [7.9971639934306102E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.4713s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9066s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5643s for    81920 events => throughput is 1.45E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238464413054557E-002) differ by less than 4E-4 (2.2110043929046697e-07)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971639934306102E-002) differ by less than 4E-4 (2.1123700788550082e-07)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.454568e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.458815e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.469296e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.487461e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112725654777677] fbridge_mode=1
- [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7619s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7598s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0010s for     8192 events => throughput is 8.23E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
+ [XSECTION] Cross section = 0.07847 [7.8471475012321185E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8337s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8291s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0035s for     8192 events => throughput is 2.31E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10112748607749111) and cuda (0.10112725654777677) differ by less than 4E-4 (2.269706518509551e-06)
+OK! xsec from fortran (7.8471485809748567E-002) and cuda (7.8471475012321185E-002) differ by less than 4E-4 (1.375968260441951e-07)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07924 [7.9238470908598507E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    2.0010s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.9891s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0107s for    90112 events => throughput is 8.40E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
+ [XSECTION] Cross section = 0.07997 [7.9971648932322295E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.3421s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.3278s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0132s for    81920 events => throughput is 6.21E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9238481932717722E-002) and cuda (7.9238470908598507E-002) differ by less than 4E-4 (1.3912582552677577e-07)
+OK! xsec from fortran (7.9971656827279608E-002) and cuda (7.9971648932322295E-002) differ by less than 4E-4 (9.872194262072753e-08)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.172109e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.709678e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.550537e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.936833e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.551280e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.247414e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.686947e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.199841e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.549100e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.195768e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.728979e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.278448e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.413968e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.108387e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.281798e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.230857e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt
index 17f42d4ffa..4de53c2d38 100644
--- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt
@@ -1,10 +1,10 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 
-
 make USEBUILDDIR=1 BACKEND=cuda
 
-make USEBUILDDIR=1 BACKEND=cppnone
 
+
+make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
 
@@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_00:54:50
+DATE: 2024-09-18_13:44:15
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0
- [UNWEIGHT] Wrote 365 events (found 1496 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6937s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3622s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.3315s for     8192 events => throughput is 2.47E+04 events/s
+ [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 387 events (found 1591 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7391s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4081s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.3310s for     8192 events => throughput is 2.48E+04 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0
- [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6619s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3285s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.3334s for     8192 events => throughput is 2.46E+04 events/s
+ [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7122s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3840s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.3281s for     8192 events => throughput is 2.50E+04 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0
- [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    5.2139s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5585s
- [COUNTERS] Fortran MEs      ( 1 ) :    3.6553s for    90112 events => throughput is 2.47E+04 events/s
+ [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    5.1945s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8920s
+ [COUNTERS] Fortran MEs      ( 1 ) :    3.3025s for    81920 events => throughput is 2.48E+04 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112748700702684] fbridge_mode=1
- [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6848s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3306s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3530s for     8192 events => throughput is 2.32E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
+ [XSECTION] Cross section = 0.07847 [7.8471486590207584E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7363s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3850s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3501s for     8192 events => throughput is 2.34E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748700702684) differ by less than 2E-4 (9.191721828116783e-09)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486590207584E-002) differ by less than 2E-4 (9.945765766516956e-09)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07924 [7.9238482679400354E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    5.4616s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5794s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    3.8810s for    90112 events => throughput is 2.32E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
+ [XSECTION] Cross section = 0.07997 [7.9971657589635384E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    5.4502s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8950s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    3.5541s for    81920 events => throughput is 2.30E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482679400354E-002) differ by less than 2E-4 (9.423232416594374e-09)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657589635384E-002) differ by less than 2E-4 (9.532824529756567e-09)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.409533e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.405336e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.396518e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.411690e+04                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112748702805033] fbridge_mode=1
- [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.5127s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3310s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1808s for     8192 events => throughput is 4.53E+04 events/s
+ [XSECTION] Cross section = 0.07847 [7.8471486540430027E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.5672s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3874s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1790s for     8192 events => throughput is 4.58E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748702805033) differ by less than 2E-4 (9.399612865834683e-09)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486540430027E-002) differ by less than 2E-4 (9.311426296676473e-09)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07924 [7.9238482683055667E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    3.5681s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5715s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.9958s for    90112 events => throughput is 4.52E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 0.07997 [7.9971657589963913E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    3.6943s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8846s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.8089s for    81920 events => throughput is 4.53E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482683055667E-002) differ by less than 2E-4 (9.469362849401364e-09)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657589963913E-002) differ by less than 2E-4 (9.536932576992285e-09)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.663293e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.686401e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.697752e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.704142e+04                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1
- [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4226s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3320s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0899s for     8192 events => throughput is 9.11E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 0.07847 [7.8471486395956899E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4765s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3867s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0892s for     8192 events => throughput is 9.18E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748681415580) differ by less than 2E-4 (7.284514991212632e-09)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486395956899E-002) differ by less than 2E-4 (7.470335683379403e-09)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    2.5611s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5711s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.9894s for    90112 events => throughput is 9.11E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 0.07997 [7.9971657432811344E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.7937s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8977s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8954s for    81920 events => throughput is 9.15E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482534347232E-002) differ by less than 2E-4 (7.592642958798024e-09)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657432811344E-002) differ by less than 2E-4 (7.571829385710771e-09)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.338563e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.255440e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.283232e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.362786e+04                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1
- [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4112s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3315s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0791s for     8192 events => throughput is 1.04E+05 events/s
+ [XSECTION] Cross section = 0.07847 [7.8471486395956899E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4681s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3893s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0782s for     8192 events => throughput is 1.05E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748681415580) differ by less than 2E-4 (7.284514991212632e-09)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486395956899E-002) differ by less than 2E-4 (7.470335683379403e-09)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    2.4528s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5718s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8803s for    90112 events => throughput is 1.02E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 0.07997 [7.9971657432811344E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.6895s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8933s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.7956s for    81920 events => throughput is 1.03E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482534347232E-002) differ by less than 2E-4 (7.592642958798024e-09)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657432811344E-002) differ by less than 2E-4 (7.571829385710771e-09)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.055357e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.053887e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.047729e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.065947e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112748700265108] fbridge_mode=1
- [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4511s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3308s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1195s for     8192 events => throughput is 6.86E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 0.07847 [7.8471486537749241E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.5056s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3870s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1179s for     8192 events => throughput is 6.95E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748700265108) differ by less than 2E-4 (9.148451995955043e-09)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486537749241E-002) differ by less than 2E-4 (9.277263846030337e-09)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07924 [7.9238482666076374E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    2.8840s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5752s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.3080s for    90112 events => throughput is 6.89E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 0.07997 [7.9971657565670345E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    3.0551s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8834s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1711s for    81920 events => throughput is 7.00E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482666076374E-002) differ by less than 2E-4 (9.255082034087536e-09)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657565670345E-002) differ by less than 2E-4 (9.233155351395794e-09)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.986026e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.918098e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.961269e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.013587e+04                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1011 [0.10112748601943165] fbridge_mode=1
- [UNWEIGHT] Wrote 386 events (found 1179 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7685s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7595s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0060s for     8192 events => throughput is 1.37E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0029s
+ [XSECTION] Cross section = 0.07847 [7.8471485791426987E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8394s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8269s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0094s for     8192 events => throughput is 8.67E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0031s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10112748607749111) and cuda (0.10112748601943165) differ by less than 2E-4 (5.74121417074025e-10)
+OK! xsec from fortran (7.8471485809748567E-002) and cuda (7.8471485791426987E-002) differ by less than 2E-4 (2.334807902570901e-10)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07924 [7.9238481937154381E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1898 events (found 1903 events)
- [COUNTERS] PROGRAM TOTAL          :    2.0160s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.9892s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0237s for    90112 events => throughput is 3.80E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0030s
+ [XSECTION] Cross section = 0.07997 [7.9971656830583548E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.3560s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.3268s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0262s for    81920 events => throughput is 3.12E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0031s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9238481932717722E-002) and cuda (7.9238481937154381E-002) differ by less than 2E-4 (5.5991211667105745e-11)
+OK! xsec from fortran (7.9971656827279608E-002) and cuda (7.9971656830583548E-002) differ by less than 2E-4 (4.131384123695625e-11)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.643017e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.114701e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.890572e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.463889e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.862432e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.291446e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.232251e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.155947e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.923478e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.267147e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.245240e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.165743e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.915356e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.239896e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.729398e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.648131e+06                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt
index 72b9dd250a..da4192a0d3 100644
--- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt
@@ -1,11 +1,11 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 
-
 make USEBUILDDIR=1 BACKEND=cuda
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make USEBUILDDIR=1 BACKEND=cppsse4
 
+
+make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
 
 make USEBUILDDIR=1 BACKEND=cpp512y
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_00:55:32
+DATE: 2024-09-18_13:44:59
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0
- [UNWEIGHT] Wrote 11 events (found 187 events)
- [COUNTERS] PROGRAM TOTAL          :    4.6116s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2616s
- [COUNTERS] Fortran MEs      ( 1 ) :    4.3500s for     8192 events => throughput is 1.88E+03 events/s
+ [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0
+ [UNWEIGHT] Wrote 7 events (found 223 events)
+ [COUNTERS] PROGRAM TOTAL          :    4.6910s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3764s
+ [COUNTERS] Fortran MEs      ( 1 ) :    4.3146s for     8192 events => throughput is 1.90E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0
- [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    4.5881s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2535s
- [COUNTERS] Fortran MEs      ( 1 ) :    4.3346s for     8192 events => throughput is 1.89E+03 events/s
+ [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    4.5801s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2959s
+ [COUNTERS] Fortran MEs      ( 1 ) :    4.2843s for     8192 events => throughput is 1.91E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.2256 [0.22558083266099815] fbridge_mode=0
- [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   49.6135s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8559s
- [COUNTERS] Fortran MEs      ( 1 ) :   47.7576s for    90112 events => throughput is 1.89E+03 events/s
+ [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   45.0141s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0696s
+ [COUNTERS] Fortran MEs      ( 1 ) :   42.9445s for    81920 events => throughput is 1.91E+03 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320556621222236] fbridge_mode=1
- [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    4.7506s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2515s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    4.4898s for     8192 events => throughput is 1.82E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0093s
+ [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    4.7546s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3011s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    4.4446s for     8192 events => throughput is 1.84E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0090s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.46320556621222242) and cpp (0.46320556621222236) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240192) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1
- [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   51.2369s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8206s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   49.4072s for    90112 events => throughput is 1.82E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0092s
+ [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   46.6278s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0620s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   44.5568s for    81920 events => throughput is 1.84E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0089s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.22558083266099815) and cpp (0.22558083266099799) differ by less than 3E-14 (7.771561172376096e-16)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.887013e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.897024e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.886305e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.894466e+03                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320556621222236] fbridge_mode=1
- [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6725s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2519s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    2.4156s for     8192 events => throughput is 3.39E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0050s
+ [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.6753s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2991s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    2.3715s for     8192 events => throughput is 3.45E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0047s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.46320556621222242) and cpp (0.46320556621222236) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240192) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.2256 [0.22558083266099785] fbridge_mode=1
- [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   28.2511s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8187s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   26.4276s for    90112 events => throughput is 3.41E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0049s
+ [XSECTION] Cross section = 0.2093 [0.20930257969248325] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   25.8378s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0749s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   23.7582s for    81920 events => throughput is 3.45E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0048s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.22558083266099815) and cpp (0.22558083266099785) differ by less than 3E-14 (1.3322676295501878e-15)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248325) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.532551e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.534769e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.540511e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.572800e+03                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320556621222231] fbridge_mode=1
- [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    1.2920s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2501s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.0393s for     8192 events => throughput is 7.88E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0025s
+ [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.3401s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2956s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.0420s for     8192 events => throughput is 7.86E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0024s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.46320556621222242) and cpp (0.46320556621222231) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240197) differ by less than 3E-14 (0.0)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1
- [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   13.2584s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8186s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   11.4371s for    90112 events => throughput is 7.88E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0027s
+ [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   12.4720s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0723s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   10.3972s for    81920 events => throughput is 7.88E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0025s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.22558083266099815) and cpp (0.22558083266099799) differ by less than 3E-14 (7.771561172376096e-16)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.101118e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.115646e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.086189e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.135568e+03                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320556621222231] fbridge_mode=1
- [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    1.1797s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2518s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.9256s for     8192 events => throughput is 8.85E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0023s
+ [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.2200s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2987s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.9191s for     8192 events => throughput is 8.91E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0022s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.46320556621222242) and cpp (0.46320556621222231) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240197) differ by less than 3E-14 (0.0)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1
- [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   12.0258s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8174s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   10.2060s for    90112 events => throughput is 8.83E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0024s
+ [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   11.2276s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0702s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    9.1552s for    81920 events => throughput is 8.95E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0022s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.22558083266099815) and cpp (0.22558083266099799) differ by less than 3E-14 (7.771561172376096e-16)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.177055e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.261139e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.175263e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.284539e+03                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320556621222231] fbridge_mode=1
- [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4405s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2500s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1875s for     8192 events => throughput is 6.90E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0030s
+ [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.4788s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2959s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1801s for     8192 events => throughput is 6.94E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0027s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.46320556621222242) and cpp (0.46320556621222231) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240197) differ by less than 3E-14 (0.0)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1
- [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   14.9495s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8217s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   13.1249s for    90112 events => throughput is 6.87E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0030s
+ [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   13.8631s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0851s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   11.7752s for    81920 events => throughput is 6.96E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0028s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.22558083266099815) and cpp (0.22558083266099799) differ by less than 3E-14 (7.771561172376096e-16)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.981801e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.044735e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.965066e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.087685e+03                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320556621222225] fbridge_mode=1
- [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7575s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6866s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0363s for     8192 events => throughput is 2.25E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0346s
+ [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8126s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7388s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0390s for     8192 events => throughput is 2.10E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0348s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.46320556621222242) and cuda (0.46320556621222225) differ by less than 3E-14 (3.3306690738754696e-16)
+OK! xsec from fortran (0.33144786561240197) and cuda (0.33144786561240192) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.2256 [0.22558083266099782] fbridge_mode=1
- [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6470s
- [COUNTERS] Fortran Overhead ( 0 ) :    2.2462s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3664s for    90112 events => throughput is 2.46E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0345s
+ [XSECTION] Cross section = 0.2093 [0.20930257969248336] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.9095s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.5487s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3260s for    81920 events => throughput is 2.51E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0348s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.22558083266099815) and cuda (0.22558083266099782) differ by less than 3E-14 (1.4432899320127035e-15)
+OK! xsec from fortran (0.20930257969248323) and cuda (0.20930257969248336) differ by less than 3E-14 (6.661338147750939e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.276910e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.150288e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.501526e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.340464e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.128345e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.120076e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.149669e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.169270e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.128670e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.124208e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.167581e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.169177e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.106037e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.120876e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.450094e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.432039e+05                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt
index 1eb1b6bf73..d51442efc8 100644
--- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt
@@ -1,9 +1,9 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 
-make USEBUILDDIR=1 BACKEND=cuda
 
 
 
+make USEBUILDDIR=1 BACKEND=cuda
 make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
@@ -18,8 +18,6 @@ make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
-make[1]: Nothing to be done for 'all'.
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
@@ -29,10 +27,12 @@ make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_00:59:36
+DATE: 2024-09-18_13:48:49
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0
- [UNWEIGHT] Wrote 11 events (found 187 events)
- [COUNTERS] PROGRAM TOTAL          :    4.6113s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2567s
- [COUNTERS] Fortran MEs      ( 1 ) :    4.3546s for     8192 events => throughput is 1.88E+03 events/s
+ [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0
+ [UNWEIGHT] Wrote 7 events (found 223 events)
+ [COUNTERS] PROGRAM TOTAL          :    4.5851s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2964s
+ [COUNTERS] Fortran MEs      ( 1 ) :    4.2887s for     8192 events => throughput is 1.91E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0
- [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    4.5970s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2531s
- [COUNTERS] Fortran MEs      ( 1 ) :    4.3439s for     8192 events => throughput is 1.89E+03 events/s
+ [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    4.5902s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2920s
+ [COUNTERS] Fortran MEs      ( 1 ) :    4.2983s for     8192 events => throughput is 1.91E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.2256 [0.22558083266099815] fbridge_mode=0
- [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   49.6113s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8558s
- [COUNTERS] Fortran MEs      ( 1 ) :   47.7555s for    90112 events => throughput is 1.89E+03 events/s
+ [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   45.0593s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0904s
+ [COUNTERS] Fortran MEs      ( 1 ) :   42.9689s for    81920 events => throughput is 1.91E+03 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320716615478996] fbridge_mode=1
- [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    4.6095s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2504s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    4.3503s for     8192 events => throughput is 1.88E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0087s
+ [XSECTION] Cross section = 0.3314 [0.33144941544531159] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    4.6331s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2951s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    4.3295s for     8192 events => throughput is 1.89E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0085s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.46320556621222242) and cpp (0.46320716615478996) differ by less than 4E-4 (3.4540659359372228e-06)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144941544531159) differ by less than 4E-4 (4.675947774535061e-06)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.2256 [0.22558162567940870] fbridge_mode=1
- [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   49.8390s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8197s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   48.0105s for    90112 events => throughput is 1.88E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0088s
+ [XSECTION] Cross section = 0.2093 [0.20930329135137288] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   45.4961s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0670s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   43.4203s for    81920 events => throughput is 1.89E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0087s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.22558083266099815) and cpp (0.22558162567940870) differ by less than 4E-4 (3.5154512074697664e-06)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930329135137288) differ by less than 4E-4 (3.400143900211816e-06)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +186,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.939353e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.953905e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.936979e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.953638e+03                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +206,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320708851010073] fbridge_mode=1
- [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4544s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2505s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.2012s for     8192 events => throughput is 6.82E+03 events/s
+ [XSECTION] Cross section = 0.3314 [0.33144937378275385] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.5948s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2947s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.2975s for     8192 events => throughput is 6.31E+03 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0026s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.46320556621222242) and cpp (0.46320708851010073) differ by less than 4E-4 (3.2864412462529913e-06)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144937378275385) differ by less than 4E-4 (4.550249099066761e-06)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +241,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.2256 [0.22558157380141428] fbridge_mode=1
- [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   15.0990s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8254s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   13.2710s for    90112 events => throughput is 6.79E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0026s
+ [XSECTION] Cross section = 0.2093 [0.20930324959819654] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   14.0680s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0718s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   11.9937s for    81920 events => throughput is 6.83E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0025s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.22558083266099815) and cpp (0.22558157380141428) differ by less than 4E-4 (3.2854760192435606e-06)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930324959819654) differ by less than 4E-4 (3.2006567445286294e-06)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +266,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.986380e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.014424e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.983775e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.021993e+03                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +286,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320704806184321] fbridge_mode=1
- [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7803s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2500s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5288s for     8192 events => throughput is 1.55E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0015s
+ [XSECTION] Cross section = 0.3314 [0.33144939353225550] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8304s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3000s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5290s for     8192 events => throughput is 1.55E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0014s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.46320556621222242) and cpp (0.46320704806184321) differ by less than 4E-4 (3.199118769003917e-06)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144939353225550) differ by less than 4E-4 (4.609834643787281e-06)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +321,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.2256 [0.22558158459897135] fbridge_mode=1
- [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :    7.6717s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8162s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    5.8540s for    90112 events => throughput is 1.54E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0015s
+ [XSECTION] Cross section = 0.2093 [0.20930327551379133] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :    7.2994s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0696s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    5.2285s for    81920 events => throughput is 1.57E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0013s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.22558083266099815) and cpp (0.22558158459897135) differ by less than 4E-4 (3.333341597855366e-06)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930327551379133) differ by less than 4E-4 (3.3244755468508913e-06)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +346,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.588565e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.599753e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.580506e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.600977e+04                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +366,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320704806184321] fbridge_mode=1
- [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7194s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2503s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4678s for     8192 events => throughput is 1.75E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0014s
+ [XSECTION] Cross section = 0.3314 [0.33144939353225550] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7627s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2949s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4666s for     8192 events => throughput is 1.76E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.46320556621222242) and cpp (0.46320704806184321) differ by less than 4E-4 (3.199118769003917e-06)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144939353225550) differ by less than 4E-4 (4.609834643787281e-06)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +401,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.2256 [0.22558158459897135] fbridge_mode=1
- [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :    6.9505s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8159s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    5.1333s for    90112 events => throughput is 1.76E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0013s
+ [XSECTION] Cross section = 0.2093 [0.20930327551379133] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :    6.8208s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0681s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    4.7514s for    81920 events => throughput is 1.72E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.22558083266099815) and cpp (0.22558158459897135) differ by less than 4E-4 (3.333341597855366e-06)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930327551379133) differ by less than 4E-4 (3.3244755468508913e-06)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +426,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.792140e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.808420e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.802320e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.816362e+04                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +446,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320713685871445] fbridge_mode=1
- [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8450s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2549s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5884s for     8192 events => throughput is 1.39E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0017s
+ [XSECTION] Cross section = 0.3314 [0.33144947551388249] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8859s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2954s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5889s for     8192 events => throughput is 1.39E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0015s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.46320556621222242) and cpp (0.46320713685871445) differ by less than 4E-4 (3.390819555360025e-06)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144947551388249) differ by less than 4E-4 (4.857178601991308e-06)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +481,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.2256 [0.22558162184774774] fbridge_mode=1
- [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :    8.3082s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8257s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    6.4808s for    90112 events => throughput is 1.39E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0017s
+ [XSECTION] Cross section = 0.2093 [0.20930331717025510] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :    7.9475s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0831s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    5.8629s for    81920 events => throughput is 1.40E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0016s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.22558083266099815) and cpp (0.22558162184774774) differ by less than 4E-4 (3.4984654515568536e-06)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930331717025510) differ by less than 4E-4 (3.523500632152121e-06)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +506,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.385160e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.420878e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.403555e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.411954e+04                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +526,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320719394836651] fbridge_mode=1
- [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7335s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6847s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0246s for     8192 events => throughput is 3.34E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0243s
+ [XSECTION] Cross section = 0.3314 [0.33144955535316123] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7891s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7375s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0271s for     8192 events => throughput is 3.03E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0246s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.46320556621222242) and cuda (0.46320719394836651) differ by less than 4E-4 (3.5140686183154912e-06)
+OK! xsec from fortran (0.33144786561240197) and cuda (0.33144955535316123) differ by less than 4E-4 (5.0980589545446264e-06)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +561,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.2256 [0.22558167135091578] fbridge_mode=1
- [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :    2.5288s
- [COUNTERS] Fortran Overhead ( 0 ) :    2.2506s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2539s for    90112 events => throughput is 3.55E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0243s
+ [XSECTION] Cross section = 0.2093 [0.20930336562619947] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.8062s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.5479s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2338s for    81920 events => throughput is 3.50E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0246s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.22558083266099815) and cuda (0.22558167135091578) differ by less than 4E-4 (3.7179130325526444e-06)
+OK! xsec from fortran (0.20930257969248323) and cuda (0.20930336562619947) differ by less than 4E-4 (3.755012085271403e-06)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +586,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.368889e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.088372e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.732230e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.376508e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.130776e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.114154e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.306866e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.259362e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.087874e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.087087e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.214851e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.241028e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.074217e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.079549e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.388796e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.391392e+05                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt
index f01e005e58..4029a4bd08 100644
--- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt
@@ -1,11 +1,11 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 
-
 make USEBUILDDIR=1 BACKEND=cuda
 
-make USEBUILDDIR=1 BACKEND=cppnone
 
+make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppsse4
+
 make USEBUILDDIR=1 BACKEND=cppavx2
 
 make USEBUILDDIR=1 BACKEND=cpp512y
@@ -18,9 +18,9 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_01:02:50
+DATE: 2024-09-18_13:51:53
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0
- [UNWEIGHT] Wrote 11 events (found 187 events)
- [COUNTERS] PROGRAM TOTAL          :    4.5981s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2571s
- [COUNTERS] Fortran MEs      ( 1 ) :    4.3410s for     8192 events => throughput is 1.89E+03 events/s
+ [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0
+ [UNWEIGHT] Wrote 7 events (found 223 events)
+ [COUNTERS] PROGRAM TOTAL          :    4.5765s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2949s
+ [COUNTERS] Fortran MEs      ( 1 ) :    4.2815s for     8192 events => throughput is 1.91E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0
- [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    4.5967s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2533s
- [COUNTERS] Fortran MEs      ( 1 ) :    4.3434s for     8192 events => throughput is 1.89E+03 events/s
+ [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    4.6114s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2967s
+ [COUNTERS] Fortran MEs      ( 1 ) :    4.3148s for     8192 events => throughput is 1.90E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.2256 [0.22558083266099815] fbridge_mode=0
- [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   49.6248s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8598s
- [COUNTERS] Fortran MEs      ( 1 ) :   47.7649s for    90112 events => throughput is 1.89E+03 events/s
+ [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   45.1244s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0863s
+ [COUNTERS] Fortran MEs      ( 1 ) :   43.0382s for    81920 events => throughput is 1.90E+03 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320556893412546] fbridge_mode=1
- [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    4.7999s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2518s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    4.5389s for     8192 events => throughput is 1.80E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0092s
+ [XSECTION] Cross section = 0.3314 [0.33144786734542164] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    4.8194s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2986s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    4.5116s for     8192 events => throughput is 1.82E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0093s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.46320556621222242) and cpp (0.46320556893412546) differ by less than 2E-4 (5.876231279344779e-09)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786734542164) differ by less than 2E-4 (5.228634192278037e-09)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.2256 [0.22558083370546855] fbridge_mode=1
- [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   51.9774s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8235s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   50.1446s for    90112 events => throughput is 1.80E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0094s
+ [XSECTION] Cross section = 0.2093 [0.20930258048084049] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   47.3150s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0784s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   45.2275s for    81920 events => throughput is 1.81E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0091s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.22558083266099815) and cpp (0.22558083370546855) differ by less than 2E-4 (4.630138050742971e-09)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258048084049) differ by less than 2E-4 (3.766591261111785e-09)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.855335e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.873631e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.862802e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.874665e+03                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320556780656974] fbridge_mode=1
- [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6423s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2504s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    2.3868s for     8192 events => throughput is 3.43E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0051s
+ [XSECTION] Cross section = 0.3314 [0.33144786651655289] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.6800s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2994s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    2.3756s for     8192 events => throughput is 3.45E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0050s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.46320556621222242) and cpp (0.46320556780656974) differ by less than 2E-4 (3.4419864736179306e-09)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786651655289) differ by less than 2E-4 (2.7278828085286477e-09)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.2256 [0.22558083390630859] fbridge_mode=1
- [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   28.5409s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8402s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   26.6956s for    90112 events => throughput is 3.38E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0051s
+ [XSECTION] Cross section = 0.2093 [0.20930258019984904] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   25.8989s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0743s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   23.8193s for    81920 events => throughput is 3.44E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0054s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.22558083266099815) and cpp (0.22558083390630859) differ by less than 2E-4 (5.520462087460487e-09)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019984904) differ by less than 2E-4 (2.424078271445751e-09)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.436169e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.519279e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.491615e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.530635e+03                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320556770726795] fbridge_mode=1
- [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    1.2920s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2524s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.0372s for     8192 events => throughput is 7.90E+03 events/s
+ [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.3351s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2976s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.0351s for     8192 events => throughput is 7.91E+03 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0024s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.46320556621222242) and cpp (0.46320556770726795) differ by less than 2E-4 (3.2276070704995163e-09)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786627894518) differ by less than 2E-4 (2.0110046961008265e-09)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.2256 [0.22558083379720220] fbridge_mode=1
- [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   13.2628s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8156s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   11.4447s for    90112 events => throughput is 7.87E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0025s
+ [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   12.4644s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0722s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   10.3898s for    81920 events => throughput is 7.88E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0024s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.22558083266099815) and cpp (0.22558083379720220) differ by less than 2E-4 (5.036793426782538e-09)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019863174) differ by less than 2E-4 (2.4182622571089496e-09)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.072952e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.009062e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.014073e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.120880e+03                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320556770726795] fbridge_mode=1
- [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    1.1675s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2513s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.9138s for     8192 events => throughput is 8.97E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0023s
+ [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.1951s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2962s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8968s for     8192 events => throughput is 9.13E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0021s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.46320556621222242) and cpp (0.46320556770726795) differ by less than 2E-4 (3.2276070704995163e-09)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786627894518) differ by less than 2E-4 (2.0110046961008265e-09)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.2256 [0.22558083379720220] fbridge_mode=1
- [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   11.8988s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8240s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   10.0725s for    90112 events => throughput is 8.95E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0023s
+ [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   11.2498s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0961s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    9.1516s for    81920 events => throughput is 8.95E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0021s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.22558083266099815) and cpp (0.22558083379720220) differ by less than 2E-4 (5.036793426782538e-09)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019863174) differ by less than 2E-4 (2.4182622571089496e-09)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.224504e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.318473e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.269645e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.350512e+03                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320556770726795] fbridge_mode=1
- [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4482s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2514s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1937s for     8192 events => throughput is 6.86E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0031s
+ [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.4981s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3004s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1949s for     8192 events => throughput is 6.86E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0027s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.46320556621222242) and cpp (0.46320556770726795) differ by less than 2E-4 (3.2276070704995163e-09)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786627894518) differ by less than 2E-4 (2.0110046961008265e-09)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.2256 [0.22558083379720220] fbridge_mode=1
- [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :   15.0050s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8226s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   13.1794s for    90112 events => throughput is 6.84E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0030s
+ [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   13.9738s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0733s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   11.8977s for    81920 events => throughput is 6.89E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0028s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.22558083266099815) and cpp (0.22558083379720220) differ by less than 2E-4 (5.036793426782538e-09)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019863174) differ by less than 2E-4 (2.4182622571089496e-09)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.918159e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.010058e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.921301e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.974396e+03                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.4632 [0.46320556665261842] fbridge_mode=1
- [UNWEIGHT] Wrote 11 events (found 168 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7570s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6865s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0361s for     8192 events => throughput is 2.27E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0344s
+ [XSECTION] Cross section = 0.3314 [0.33144786533876569] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8114s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7381s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0386s for     8192 events => throughput is 2.12E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0347s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.46320556621222242) and cuda (0.46320556665261842) differ by less than 2E-4 (9.507570286615419e-10)
+OK! xsec from fortran (0.33144786561240197) and cuda (0.33144786533876569) differ by less than 2E-4 (8.255786054789382e-10)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.2256 [0.22558083224243403] fbridge_mode=1
- [UNWEIGHT] Wrote 18 events (found 294 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6619s
- [COUNTERS] Fortran Overhead ( 0 ) :    2.2612s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3664s for    90112 events => throughput is 2.46E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0344s
+ [XSECTION] Cross section = 0.2093 [0.20930258003933860] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.9001s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.5282s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3371s for    81920 events => throughput is 2.43E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0348s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.22558083266099815) and cuda (0.22558083224243403) differ by less than 2E-4 (1.855495090907766e-09)
+OK! xsec from fortran (0.20930257969248323) and cuda (0.20930258003933860) differ by less than 2E-4 (1.6571959360334176e-09)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.283925e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.156591e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.521176e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.143626e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.129933e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.122372e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.186234e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.161172e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.133957e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.154782e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.181867e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.164268e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.130752e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.117598e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.449188e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.420328e+05                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
index 744ad57a47..b5fe53dcd6 100644
--- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
@@ -3,8 +3,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g
 
 make USEBUILDDIR=1 BACKEND=cuda
 
-make USEBUILDDIR=1 BACKEND=cppnone
 
+make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
 
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_01:08:23
+DATE: 2024-09-18_13:57:21
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0
- [UNWEIGHT] Wrote 1 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  103.8119s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5133s
- [COUNTERS] Fortran MEs      ( 1 ) :  103.2986s for     8192 events => throughput is 7.93E+01 events/s
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 1 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :  100.9942s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5376s
+ [COUNTERS] Fortran MEs      ( 1 ) :  100.4566s for     8192 events => throughput is 8.15E+01 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0
- [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  103.9562s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5117s
- [COUNTERS] Fortran MEs      ( 1 ) :  103.4445s for     8192 events => throughput is 7.92E+01 events/s
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 18 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :  100.8250s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5356s
+ [COUNTERS] Fortran MEs      ( 1 ) :  100.2893s for     8192 events => throughput is 8.17E+01 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0
- [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          : 1141.2639s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4413s
- [COUNTERS] Fortran MEs      ( 1 ) : 1136.8226s for    90112 events => throughput is 7.93E+01 events/s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          : 1009.3485s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5474s
+ [COUNTERS] Fortran MEs      ( 1 ) : 1004.8011s for    81920 events => throughput is 8.15E+01 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.24e-06 [1.2403985227939193E-006] fbridge_mode=1
- [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  124.5510s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5196s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  123.8327s for     8192 events => throughput is 6.62E+01 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1987s
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282475E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 18 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :  122.6272s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5269s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  121.8976s for     8192 events => throughput is 6.72E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2027s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939193E-006) differ by less than 3E-14 (1.5543122344752192e-15)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282475E-007) differ by less than 3E-14 (2.4424906541753444e-15)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          : 1400.6576s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.5060s
- [COUNTERS] CudaCpp MEs      ( 2 ) : 1395.9552s for    90112 events => throughput is 6.46E+01 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1964s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713115633775E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          : 1215.7257s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.3426s
+ [COUNTERS] CudaCpp MEs      ( 2 ) : 1211.1771s for    81920 events => throughput is 6.76E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2060s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633775E-007) differ by less than 3E-14 (1.5543122344752192e-15)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.057551e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.947835e+01                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.753971e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.953028e+01                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.24e-06 [1.2403985227939197E-006] fbridge_mode=1
- [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   62.4647s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5243s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   61.8356s for     8192 events => throughput is 1.32E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1048s
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 18 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :   65.0412s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5159s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   64.4246s for     8192 events => throughput is 1.27E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1007s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939197E-006) differ by less than 3E-14 (1.7763568394002505e-15)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.332e-07 [2.3322993086656017E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  687.6948s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.5179s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  683.0742s for    90112 events => throughput is 1.32E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1026s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  643.1942s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.3363s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  638.7557s for    81920 events => throughput is 1.28E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1021s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656017E-007) differ by less than 3E-14 (2.220446049250313e-15)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.563733e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.582676e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.550612e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.328359e+02                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1
- [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   29.7924s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5177s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   29.2260s for     8192 events => throughput is 2.80E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0487s
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 18 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :   28.5049s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5111s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   27.9480s for     8192 events => throughput is 2.93E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0458s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939191E-006) differ by less than 3E-14 (1.3322676295501878e-15)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  324.2440s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4975s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  319.6986s for    90112 events => throughput is 2.82E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0479s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  284.0441s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.3313s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  279.6676s for    81920 events => throughput is 2.93E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0452s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.360978e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.544656e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.361478e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.429932e+02                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1
- [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   26.2312s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5190s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   25.6700s for     8192 events => throughput is 3.19E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0423s
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 18 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :   25.4242s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5110s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   24.8732s for     8192 events => throughput is 3.29E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0401s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939191E-006) differ by less than 3E-14 (1.3322676295501878e-15)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  286.1989s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4892s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  281.6678s for    90112 events => throughput is 3.20E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0420s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  258.4274s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.3572s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  254.0306s for    81920 events => throughput is 3.22E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0397s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.875186e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.924332e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.884361e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.943882e+02                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1
- [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   26.4167s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5180s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   25.8509s for     8192 events => throughput is 3.17E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0478s
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 18 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :   26.1506s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5251s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   25.5777s for     8192 events => throughput is 3.20E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0477s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939191E-006) differ by less than 3E-14 (1.3322676295501878e-15)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  288.3192s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.5257s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  283.7442s for    90112 events => throughput is 3.18E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0493s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  258.8965s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4685s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  254.3821s for    81920 events => throughput is 3.22E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0458s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.383791e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.459682e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.435116e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.450518e+02                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1
- [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :    3.1981s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.0109s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.0998s for     8192 events => throughput is 7.45E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    1.0874s
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282475E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 18 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :    3.2103s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.0334s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1011s for     8192 events => throughput is 7.44E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    1.0758s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2403985227939174E-006) and cuda (1.2403985227939195E-006) differ by less than 3E-14 (1.7763568394002505e-15)
+OK! xsec from fortran (2.3572561551282417E-007) and cuda (2.3572561551282475E-007) differ by less than 3E-14 (2.4424906541753444e-15)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.332e-07 [2.3322993086656006E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :   17.9918s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.9757s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   11.9277s for    90112 events => throughput is 7.55E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    1.0883s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713115633791E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :   16.9165s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.9531s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   10.8819s for    81920 events => throughput is 7.53E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    1.0816s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.3322993086655967E-007) and cuda (2.3322993086656006E-007) differ by less than 3E-14 (1.7763568394002505e-15)
+OK! xsec from fortran (2.2842713115633741E-007) and cuda (2.2842713115633791E-007) differ by less than 3E-14 (2.220446049250313e-15)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.501572e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.508582e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.301580e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.240924e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.262889e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.270483e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 512 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.584992e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.589702e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.233679e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.287293e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.452839e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.424280e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.249444e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.273685e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.235776e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.243085e+03                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt
index d6f64f4919..2a956cd657 100644
--- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt
@@ -1,22 +1,22 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 
-
 make USEBUILDDIR=1 BACKEND=cuda
 
 make USEBUILDDIR=1 BACKEND=cppnone
 
+
 make USEBUILDDIR=1 BACKEND=cppsse4
-make USEBUILDDIR=1 BACKEND=cppavx2
 
+make USEBUILDDIR=1 BACKEND=cppavx2
 make USEBUILDDIR=1 BACKEND=cpp512y
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_02:33:47
+DATE: 2024-09-18_15:14:59
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0
- [UNWEIGHT] Wrote 1 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  103.7942s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5071s
- [COUNTERS] Fortran MEs      ( 1 ) :  103.2871s for     8192 events => throughput is 7.93E+01 events/s
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 1 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :  100.8383s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5370s
+ [COUNTERS] Fortran MEs      ( 1 ) :  100.3014s for     8192 events => throughput is 8.17E+01 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0
- [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  103.8381s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5120s
- [COUNTERS] Fortran MEs      ( 1 ) :  103.3261s for     8192 events => throughput is 7.93E+01 events/s
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 18 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :  100.6800s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5304s
+ [COUNTERS] Fortran MEs      ( 1 ) :  100.1496s for     8192 events => throughput is 8.18E+01 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0
- [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          : 1145.7081s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4432s
- [COUNTERS] Fortran MEs      ( 1 ) : 1141.2649s for    90112 events => throughput is 7.90E+01 events/s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          : 1006.5135s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5331s
+ [COUNTERS] Fortran MEs      ( 1 ) : 1001.9804s for    81920 events => throughput is 8.18E+01 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -126,22 +126,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.241e-06 [1.2405719945779552E-006] fbridge_mode=1
- [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  115.2915s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5177s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  114.5848s for     8192 events => throughput is 7.15E+01 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1889s
+ [XSECTION] Cross section = 2.358e-07 [2.3575849446922190E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 18 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :  113.7634s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5261s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  113.0501s for     8192 events => throughput is 7.25E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1871s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405719945779552E-006) differ by less than 4E-4 (0.00013985165319851944)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575849446922190E-007) differ by less than 4E-4 (0.00013947977747852391)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -161,22 +162,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.333e-07 [2.3326290777570335E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          : 1259.1659s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.5060s
- [COUNTERS] CudaCpp MEs      ( 2 ) : 1254.4725s for    90112 events => throughput is 7.18E+01 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1874s
+ [XSECTION] Cross section = 2.285e-07 [2.2845954405861011E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          : 1135.0851s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4478s
+ [COUNTERS] CudaCpp MEs      ( 2 ) : 1130.4514s for    81920 events => throughput is 7.25E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1858s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326290777570335E-007) differ by less than 4E-4 (0.00014139226908471692)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845954405861011E-007) differ by less than 4E-4 (0.00014189602657355138)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +187,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.521351e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.611057e+01                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.544662e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.618948e+01                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -206,22 +208,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.241e-06 [1.2405716994349971E-006] fbridge_mode=1
- [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   28.4388s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5228s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   27.8678s for     8192 events => throughput is 2.94E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0481s
+ [XSECTION] Cross section = 2.358e-07 [2.3575845178322101E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 18 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :   28.2816s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5245s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   27.7095s for     8192 events => throughput is 2.96E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0476s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405716994349971E-006) differ by less than 4E-4 (0.00013961371115600585)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575845178322101E-007) differ by less than 4E-4 (0.0001392986940575991)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -241,22 +244,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.333e-07 [2.3326284885505778E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  310.3944s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.5057s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  305.8410s for    90112 events => throughput is 2.95E+02 events/s
+ [XSECTION] Cross section = 2.285e-07 [2.2845949484525033E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  283.1165s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4533s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  278.6156s for    81920 events => throughput is 2.94E+02 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0477s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326284885505778E-007) differ by less than 4E-4 (0.0001411396400787801)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845949484525033E-007) differ by less than 4E-4 (0.00014168058211416756)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +269,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.377580e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.409666e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.375822e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.406819e+02                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -286,22 +290,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.241e-06 [1.2405716646933743E-006] fbridge_mode=1
- [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   15.0768s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5183s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   14.5341s for     8192 events => throughput is 5.64E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0245s
+ [XSECTION] Cross section = 2.358e-07 [2.3575845169411084E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 18 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :   14.8123s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5276s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   14.2607s for     8192 events => throughput is 5.74E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0240s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405716646933743E-006) differ by less than 4E-4 (0.00013958570271999093)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575845169411084E-007) differ by less than 4E-4 (0.0001392983160326544)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -321,22 +326,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.333e-07 [2.3326277033163402E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  164.6278s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4988s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  160.1046s for    90112 events => throughput is 5.63E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0245s
+ [XSECTION] Cross section = 2.285e-07 [2.2845940747287339E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  146.8484s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4408s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  142.3838s for    81920 events => throughput is 5.75E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0238s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326277033163402E-007) differ by less than 4E-4 (0.00014080296191987252)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845940747287339E-007) differ by less than 4E-4 (0.0001412980864952118)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +351,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.752482e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.847317e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.781633e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.876249e+02                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -366,22 +372,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.241e-06 [1.2405716646933743E-006] fbridge_mode=1
- [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   13.2735s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5198s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   12.7324s for     8192 events => throughput is 6.43E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0213s
+ [XSECTION] Cross section = 2.358e-07 [2.3575845169411084E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 18 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :   13.2590s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5224s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   12.7153s for     8192 events => throughput is 6.44E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0212s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405716646933743E-006) differ by less than 4E-4 (0.00013958570271999093)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575845169411084E-007) differ by less than 4E-4 (0.0001392983160326544)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -401,22 +408,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.333e-07 [2.3326277033163402E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  144.3691s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.5029s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  139.8451s for    90112 events => throughput is 6.44E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0211s
+ [XSECTION] Cross section = 2.285e-07 [2.2845940747287339E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  130.9182s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4397s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  126.4573s for    81920 events => throughput is 6.48E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0212s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326277033163402E-007) differ by less than 4E-4 (0.00014080296191987252)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845940747287339E-007) differ by less than 4E-4 (0.0001412980864952118)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +433,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.750549e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.748756e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.735218e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.729474e+02                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -446,22 +454,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.241e-06 [1.2405719257109645E-006] fbridge_mode=1
- [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   13.3695s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5197s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   12.8251s for     8192 events => throughput is 6.39E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0247s
+ [XSECTION] Cross section = 2.358e-07 [2.3575850859831750E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 18 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :   13.3128s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5317s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   12.7560s for     8192 events => throughput is 6.42E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0251s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405719257109645E-006) differ by less than 4E-4 (0.00013979613314640815)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575850859831750E-007) differ by less than 4E-4 (0.00013953971621538663)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -481,22 +490,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.333e-07 [2.3326283665697276E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  145.6119s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.5195s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  141.0684s for    90112 events => throughput is 6.39E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0239s
+ [XSECTION] Cross section = 2.285e-07 [2.2845946568145136E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  131.9849s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4720s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  127.4891s for    81920 events => throughput is 6.43E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0238s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326283665697276E-007) differ by less than 4E-4 (0.00014108733939433016)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845946568145136E-007) differ by less than 4E-4 (0.00014155290989403824)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +515,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.855652e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.915808e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.856623e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.934421e+02                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -525,22 +535,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.241e-06 [1.2405721007137020E-006] fbridge_mode=1
- [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :    2.1096s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.0064s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5405s for     8192 events => throughput is 1.52E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.5628s
+ [XSECTION] Cross section = 2.358e-07 [2.3575862304433055E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 18 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.1536s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.0553s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5409s for     8192 events => throughput is 1.51E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.5574s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2403985227939174E-006) and cuda (1.2405721007137020E-006) differ by less than 4E-4 (0.00013993721904270728)
+OK! xsec from fortran (2.3572561551282417E-007) and cuda (2.3575862304433055E-007) differ by less than 4E-4 (0.00014002522141920437)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -559,22 +570,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.333e-07 [2.3326295421688232E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :   11.4101s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.9867s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    5.8619s for    90112 events => throughput is 1.54E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.5616s
+ [XSECTION] Cross section = 2.285e-07 [2.2845959888250639E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :   10.9466s
+ [COUNTERS] Fortran Overhead ( 0 ) :    5.0484s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    5.3690s for    81920 events => throughput is 1.53E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.5292s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.3322993086655967E-007) and cuda (2.3326295421688232E-007) differ by less than 4E-4 (0.00014159139095037965)
+OK! xsec from fortran (2.2842713115633741E-007) and cuda (2.2845959888250639E-007) differ by less than 4E-4 (0.0001421360326359089)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -583,42 +595,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.546914e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.538905e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.545489e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.534050e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.133242e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.137147e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 512 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.150716e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.187870e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.155991e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.144301e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.210272e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.161388e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.130976e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.156097e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.988585e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.971114e+03                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt
index 5bfdf2922a..e04ca3f869 100644
--- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt
@@ -1,13 +1,13 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 
-make USEBUILDDIR=1 BACKEND=cuda
-
 
+make USEBUILDDIR=1 BACKEND=cuda
 make USEBUILDDIR=1 BACKEND=cppnone
+
 make USEBUILDDIR=1 BACKEND=cppsse4
 
-make USEBUILDDIR=1 BACKEND=cppavx2
 
+make USEBUILDDIR=1 BACKEND=cppavx2
 make USEBUILDDIR=1 BACKEND=cpp512y
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_03:39:02
+DATE: 2024-09-18_16:14:25
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0
- [UNWEIGHT] Wrote 1 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  104.0089s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5087s
- [COUNTERS] Fortran MEs      ( 1 ) :  103.5002s for     8192 events => throughput is 7.91E+01 events/s
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 1 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :  101.0730s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5330s
+ [COUNTERS] Fortran MEs      ( 1 ) :  100.5400s for     8192 events => throughput is 8.15E+01 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0
- [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  104.2447s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5161s
- [COUNTERS] Fortran MEs      ( 1 ) :  103.7286s for     8192 events => throughput is 7.90E+01 events/s
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 18 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :  100.8963s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5347s
+ [COUNTERS] Fortran MEs      ( 1 ) :  100.3617s for     8192 events => throughput is 8.16E+01 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0
- [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          : 1141.3180s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4408s
- [COUNTERS] Fortran MEs      ( 1 ) : 1136.8772s for    90112 events => throughput is 7.93E+01 events/s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          : 1008.5494s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5481s
+ [COUNTERS] Fortran MEs      ( 1 ) : 1004.0013s for    81920 events => throughput is 8.16E+01 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.24e-06 [1.2403985299359844E-006] fbridge_mode=1
- [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :  126.1475s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5218s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  125.4139s for     8192 events => throughput is 6.53E+01 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2118s
+ [XSECTION] Cross section = 2.357e-07 [2.3572561678995975E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 18 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :  119.7272s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5266s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  118.9906s for     8192 events => throughput is 6.88E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2100s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985299359844E-006) differ by less than 2E-4 (5.7578810608305275e-09)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561678995975E-007) differ by less than 2E-4 (5.417890580616813e-09)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.332e-07 [2.3322993212353001E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          : 1383.7655s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.5039s
- [COUNTERS] CudaCpp MEs      ( 2 ) : 1379.0500s for    90112 events => throughput is 6.53E+01 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2115s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713238614534E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          : 1235.8333s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4485s
+ [COUNTERS] CudaCpp MEs      ( 2 ) : 1231.1755s for    81920 events => throughput is 6.65E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2092s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993212353001E-007) differ by less than 2E-4 (5.389404034161771e-09)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713238614534E-007) differ by less than 2E-4 (5.38380851011766e-09)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.539167e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.603593e+01                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.538387e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.563954e+01                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.24e-06 [1.2403985295828471E-006] fbridge_mode=1
- [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   65.2362s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5183s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   64.6135s for     8192 events => throughput is 1.27E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1044s
+ [XSECTION] Cross section = 2.357e-07 [2.3572561701257335E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 18 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :   64.0094s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5236s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   63.3817s for     8192 events => throughput is 1.29E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1041s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985295828471E-006) differ by less than 2E-4 (5.473184350179849e-09)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561701257335E-007) differ by less than 2E-4 (6.3622664914220195e-09)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.332e-07 [2.3322993222645653E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  720.6180s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.5092s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  716.0059s for    90112 events => throughput is 1.26E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1030s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713242471448E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  636.9448s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4347s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  632.4070s for    81920 events => throughput is 1.30E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1031s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222645653E-007) differ by less than 2E-4 (5.830713245558172e-09)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713242471448E-007) differ by less than 2E-4 (5.552655002460938e-09)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.544939e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.548168e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.546626e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.548990e+02                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1
- [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   28.2487s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5164s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   27.6865s for     8192 events => throughput is 2.96E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0458s
+ [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 18 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :   28.2427s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5259s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   27.6712s for     8192 events => throughput is 2.96E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0456s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985293629285E-006) differ by less than 2E-4 (5.29588750630694e-09)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561705911026E-007) differ by less than 2E-4 (6.559686349660865e-09)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  306.6356s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4981s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  302.0914s for    90112 events => throughput is 2.98E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0461s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  285.3174s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4390s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  280.8328s for    81920 events => throughput is 2.92E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0456s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007) differ by less than 2E-4 (5.822204496297445e-09)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713241239113E-007) differ by less than 2E-4 (5.498706379114537e-09)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.532662e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.559056e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.529106e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.558709e+02                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1
- [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   24.9404s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5184s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   24.3827s for     8192 events => throughput is 3.36E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0393s
+ [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 18 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :   25.3100s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5275s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   24.7433s for     8192 events => throughput is 3.31E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0391s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985293629285E-006) differ by less than 2E-4 (5.29588750630694e-09)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561705911026E-007) differ by less than 2E-4 (6.559686349660865e-09)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  273.3928s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4984s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  268.8528s for    90112 events => throughput is 3.35E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0417s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  250.0775s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4353s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  245.6035s for    81920 events => throughput is 3.34E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0387s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007) differ by less than 2E-4 (5.822204496297445e-09)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713241239113E-007) differ by less than 2E-4 (5.498706379114537e-09)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.133789e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.143810e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.144899e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.119192e+02                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1
- [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :   25.8510s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5190s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   25.2849s for     8192 events => throughput is 3.24E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0471s
+ [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 18 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :   25.7695s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5251s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   25.1974s for     8192 events => throughput is 3.25E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0470s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985293629285E-006) differ by less than 2E-4 (5.29588750630694e-09)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561705911026E-007) differ by less than 2E-4 (6.559686349660865e-09)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :  284.5120s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.5136s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  279.9519s for    90112 events => throughput is 3.22E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0466s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  262.3166s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4708s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  257.7997s for    81920 events => throughput is 3.18E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0461s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007) differ by less than 2E-4 (5.822204496297445e-09)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713241239113E-007) differ by less than 2E-4 (5.498706379114537e-09)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.471913e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.519965e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.462958e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.518227e+02                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.24e-06 [1.2403985217419736E-006] fbridge_mode=1
- [UNWEIGHT] Wrote 70 events (found 407 events)
- [COUNTERS] PROGRAM TOTAL          :    2.7567s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.0084s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8770s for     8192 events => throughput is 9.34E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.8713s
+ [XSECTION] Cross section = 2.357e-07 [2.3572561518129465E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 18 events (found 285 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.7761s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.0249s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8762s for     8192 events => throughput is 9.35E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.8750s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2403985227939174E-006) and cuda (1.2403985217419736E-006) differ by less than 2E-4 (8.480691704448873e-10)
+OK! xsec from fortran (2.3572561551282417E-007) and cuda (2.3572561518129465E-007) differ by less than 2E-4 (1.4064212017217415e-09)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.332e-07 [2.3322993078576733E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 303 events (found 1531 events)
- [COUNTERS] PROGRAM TOTAL          :   15.3678s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.9924s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    9.5033s for    90112 events => throughput is 9.48E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.8721s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713109538129E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :   14.4588s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.9583s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    8.6324s for    81920 events => throughput is 9.49E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.8681s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.3322993086655967E-007) and cuda (2.3322993078576733E-007) differ by less than 2E-4 (3.464063480507207e-10)
+OK! xsec from fortran (2.2842713115633741E-007) and cuda (2.2842713109538129E-007) differ by less than 2E-4 (2.668514298420632e-10)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.459517e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.423002e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.088954e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.078690e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.109677e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.104813e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 512 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.157759e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.152942e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.112863e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.106947e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.110468e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.110409e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.115265e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.106917e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.668124e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.676393e+03                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt
index d5d9f39a76..13fa996bcb 100644
--- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt
@@ -1,22 +1,22 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu
 
+make USEBUILDDIR=1 BACKEND=cuda
 
+make USEBUILDDIR=1 BACKEND=cppsse4
 
 
-make USEBUILDDIR=1 BACKEND=cuda
 make USEBUILDDIR=1 BACKEND=cppnone
-make USEBUILDDIR=1 BACKEND=cppsse4
-make USEBUILDDIR=1 BACKEND=cppavx2
 
+make USEBUILDDIR=1 BACKEND=cppavx2
 make USEBUILDDIR=1 BACKEND=cpp512y
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_01:06:56
+DATE: 2024-09-18_13:55:45
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0
- [UNWEIGHT] Wrote 404 events (found 1817 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4844s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4104s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0740s for     8192 events => throughput is 1.11E+05 events/s
+ [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0
+ [UNWEIGHT] Wrote 506 events (found 1943 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.5290s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4573s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0716s for     8192 events => throughput is 1.14E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0
- [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4098s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3373s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0726s for     8192 events => throughput is 1.13E+05 events/s
+ [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4789s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4070s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0719s for     8192 events => throughput is 1.14E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0
- [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    2.3919s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5957s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.7962s for    90112 events => throughput is 1.13E+05 events/s
+ [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.5996s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8818s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.7178s for    81920 events => throughput is 1.14E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110539351263335] fbridge_mode=1
- [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4172s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3383s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0781s for     8192 events => throughput is 1.05E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 0.2031 [0.20313504505737132] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4885s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4104s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0774s for     8192 events => throughput is 1.06E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351263335) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737132) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1
- [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    2.4327s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5686s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8633s for    90112 events => throughput is 1.04E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 0.211 [0.21095842877427598] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.6882s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9084s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.7791s for    81920 events => throughput is 1.05E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427598) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.067668e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.061513e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.067401e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.071810e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110539351262541] fbridge_mode=1
- [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3804s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3364s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0434s for     8192 events => throughput is 1.89E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.2031 [0.20313504505737170] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4501s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4063s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0432s for     8192 events => throughput is 1.90E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351262541) differ by less than 3E-14 (2.90878432451791e-14)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737170) differ by less than 3E-14 (2.220446049250313e-15)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2151 [0.21510686556561281] fbridge_mode=1
- [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    2.0534s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5637s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4891s for    90112 events => throughput is 1.84E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.211 [0.21095842877427590] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.3263s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8964s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4293s for    81920 events => throughput is 1.91E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561281) differ by less than 3E-14 (6.661338147750939e-16)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427590) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.925397e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.907415e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.917741e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.918851e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1
- [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3636s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3380s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0250s for     8192 events => throughput is 3.28E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4325s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4074s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0246s for     8192 events => throughput is 3.33E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351263341) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737162) differ by less than 3E-14 (1.7763568394002505e-15)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1
- [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8426s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5659s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2761s for    90112 events => throughput is 3.26E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.1464s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8971s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2488s for    81920 events => throughput is 3.29E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427592) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.279688e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.325644e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.324148e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.338006e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1
- [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3593s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3360s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0227s for     8192 events => throughput is 3.60E+05 events/s
+ [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4312s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4090s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0217s for     8192 events => throughput is 3.78E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351263341) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737162) differ by less than 3E-14 (1.7763568394002505e-15)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1
- [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8246s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5753s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2488s for    90112 events => throughput is 3.62E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.1356s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9099s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2252s for    81920 events => throughput is 3.64E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427592) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.646317e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.464520e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.693076e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.541284e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1
- [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3735s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3382s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0346s for     8192 events => throughput is 2.37E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4489s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4137s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0347s for     8192 events => throughput is 2.36E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351263341) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737162) differ by less than 3E-14 (1.7763568394002505e-15)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1
- [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.9409s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5647s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3756s for    90112 events => throughput is 2.40E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.2441s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9069s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3367s for    81920 events => throughput is 2.43E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427592) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.391806e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.393978e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.360348e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.414185e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110539351263363] fbridge_mode=1
- [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7677s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7657s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0008s for     8192 events => throughput is 1.08E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
+ [XSECTION] Cross section = 0.2031 [0.20313504505737173] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8542s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8498s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0032s for     8192 events => throughput is 2.55E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27110539351263330) and cuda (0.27110539351263363) differ by less than 3E-14 (1.3322676295501878e-15)
+OK! xsec from fortran (0.20313504505737126) and cuda (0.20313504505737173) differ by less than 3E-14 (2.220446049250313e-15)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2151 [0.21510686556561304] fbridge_mode=1
- [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    2.0026s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.9932s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0081s for    90112 events => throughput is 1.11E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
+ [XSECTION] Cross section = 0.211 [0.21095842877427598] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.3403s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.3289s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0103s for    81920 events => throughput is 7.96E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21510686556561295) and cuda (0.21510686556561304) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (0.21095842877427595) and cuda (0.21095842877427598) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.566081e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.881632e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.981957e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.254031e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.577541e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.277303e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.550054e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.101959e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.565709e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.239939e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.833768e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.286389e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.582119e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.254121e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.784852e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.640364e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt
index 7746e8ccdc..0c2abc603a 100644
--- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt
@@ -3,8 +3,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g
 
 make USEBUILDDIR=1 BACKEND=cuda
 
-make USEBUILDDIR=1 BACKEND=cppnone
 
+make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
 
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_01:07:25
+DATE: 2024-09-18_13:56:17
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0
- [UNWEIGHT] Wrote 404 events (found 1817 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4734s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4014s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0720s for     8192 events => throughput is 1.14E+05 events/s
+ [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0
+ [UNWEIGHT] Wrote 506 events (found 1943 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.5308s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4584s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0725s for     8192 events => throughput is 1.13E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0
- [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4152s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3416s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0736s for     8192 events => throughput is 1.11E+05 events/s
+ [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4871s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4149s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0721s for     8192 events => throughput is 1.14E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0
- [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    2.3818s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5842s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.7976s for    90112 events => throughput is 1.13E+05 events/s
+ [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.6180s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8993s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.7187s for    81920 events => throughput is 1.14E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110463158198617] fbridge_mode=1
- [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4111s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3367s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0736s for     8192 events => throughput is 1.11E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 0.2031 [0.20313506133732837] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4837s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4096s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0735s for     8192 events => throughput is 1.11E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27110539351263330) and cpp (0.27110463158198617) differ by less than 4E-4 (2.8104591991429118e-06)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313506133732837) differ by less than 4E-4 (8.014351782215101e-08)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2151 [0.21510686347932190] fbridge_mode=1
- [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    2.3828s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5705s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8117s for    90112 events => throughput is 1.11E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 0.211 [0.21095842907143103] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.6552s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9225s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.7321s for    81920 events => throughput is 1.12E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686347932190) differ by less than 4E-4 (9.698858494111562e-09)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842907143103) differ by less than 4E-4 (1.4085954624931674e-09)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.111352e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.127783e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.122906e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.135487e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110459183868807] fbridge_mode=1
- [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3640s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3367s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0269s for     8192 events => throughput is 3.05E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.2031 [0.20313502997679400] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4362s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4092s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0266s for     8192 events => throughput is 3.07E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27110539351263330) and cpp (0.27110459183868807) differ by less than 4E-4 (2.9570564231695684e-06)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313502997679400) differ by less than 4E-4 (7.423917058879681e-08)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2151 [0.21510683073685827] fbridge_mode=1
- [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8573s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5610s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2959s for    90112 events => throughput is 3.05E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 0.211 [0.21095839656505114] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.1752s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9071s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2678s for    81920 events => throughput is 3.06E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21510686556561295) and cpp (0.21510683073685827) differ by less than 4E-4 (1.6191372875784538e-07)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095839656505114) differ by less than 4E-4 (1.5268043562777223e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.037364e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.031782e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.973321e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.033396e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110460727141733] fbridge_mode=1
- [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3493s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3354s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0135s for     8192 events => throughput is 6.08E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.2031 [0.20313502619857851] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4208s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4072s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0133s for     8192 events => throughput is 6.15E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27110539351263330) and cpp (0.27110460727141733) differ by less than 4E-4 (2.9001312211729413e-06)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313502619857851) differ by less than 4E-4 (9.283869628617936e-08)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2151 [0.21510682516942223] fbridge_mode=1
- [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7115s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5638s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1473s for    90112 events => throughput is 6.12E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.211 [0.21095839412856376] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0338s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8997s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1338s for    81920 events => throughput is 6.12E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21510686556561295) and cpp (0.21510682516942223) differ by less than 4E-4 (1.8779591537398943e-07)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095839412856376) differ by less than 4E-4 (1.6423004467469582e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.189171e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.165970e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.287593e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.197191e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110460727141733] fbridge_mode=1
- [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3474s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3347s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0123s for     8192 events => throughput is 6.66E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.2031 [0.20313502619857851] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4217s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4091s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0123s for     8192 events => throughput is 6.65E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27110539351263330) and cpp (0.27110460727141733) differ by less than 4E-4 (2.9001312211729413e-06)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313502619857851) differ by less than 4E-4 (9.283869628617936e-08)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2151 [0.21510682516942223] fbridge_mode=1
- [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.6980s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5613s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1363s for    90112 events => throughput is 6.61E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.211 [0.21095839412856376] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0322s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9087s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1232s for    81920 events => throughput is 6.65E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21510686556561295) and cpp (0.21510682516942223) differ by less than 4E-4 (1.8779591537398943e-07)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095839412856376) differ by less than 4E-4 (1.6423004467469582e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.705294e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.618658e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.751048e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.353403e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110464220032526] fbridge_mode=1
- [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3554s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3380s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0168s for     8192 events => throughput is 4.87E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 0.2031 [0.20313505300145301] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4238s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4063s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0172s for     8192 events => throughput is 4.78E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27110539351263330) and cpp (0.27110464220032526) differ by less than 4E-4 (2.771292368253242e-06)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313505300145301) differ by less than 4E-4 (3.910739154733278e-08)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2151 [0.21510685471570221] fbridge_mode=1
- [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7520s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5628s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1886s for    90112 events => throughput is 4.78E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 0.211 [0.21095842133012335] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0846s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9123s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1719s for    81920 events => throughput is 4.77E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21510686556561295) and cpp (0.21510685471570221) differ by less than 4E-4 (5.043963013928732e-08)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842133012335) differ by less than 4E-4 (3.528729641821826e-08)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.699105e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.731547e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.760510e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.814682e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110477321990667] fbridge_mode=1
- [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7659s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7644s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.33E+07 events/s
+ [XSECTION] Cross section = 0.2031 [0.20313508590887899] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8504s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8464s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.64E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0009s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27110539351263330) and cuda (0.27110477321990667) differ by less than 4E-4 (2.2880132283242816e-06)
+OK! xsec from fortran (0.20313504505737126) and cuda (0.20313508590887899) differ by less than 4E-4 (2.011051698502797e-07)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2151 [0.21510689318513457] fbridge_mode=1
- [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    2.0120s
- [COUNTERS] Fortran Overhead ( 0 ) :    2.0043s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0068s for    90112 events => throughput is 1.33E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0010s
+ [XSECTION] Cross section = 0.211 [0.21095846337765808] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.3852s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.3752s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0091s for    81920 events => throughput is 8.98E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0009s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21510686556561295) and cuda (0.21510689318513457) differ by less than 4E-4 (1.2839907048700638e-07)
+OK! xsec from fortran (0.21095842877427595) and cuda (0.21095846337765808) differ by less than 4E-4 (1.640293887383848e-07)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.646157e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.088649e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.427430e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.406235e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.058841e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.833815e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.444069e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.147443e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.942937e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.839780e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.493709e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.591152e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.477775e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.570167e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.312163e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.189550e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt
index cac7bc2d3a..d3b173c725 100644
--- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt
@@ -1,11 +1,11 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu
 
+
 make USEBUILDDIR=1 BACKEND=cuda
 
+make USEBUILDDIR=1 BACKEND=cppnone
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-
-make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppavx2
 
 make USEBUILDDIR=1 BACKEND=cpp512y
@@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_01:07:54
+DATE: 2024-09-18_13:56:48
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0
- [UNWEIGHT] Wrote 404 events (found 1817 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4748s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4019s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0728s for     8192 events => throughput is 1.12E+05 events/s
+ [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0
+ [UNWEIGHT] Wrote 506 events (found 1943 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.5250s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4531s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0719s for     8192 events => throughput is 1.14E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0
- [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4102s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3379s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0723s for     8192 events => throughput is 1.13E+05 events/s
+ [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4812s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4090s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0722s for     8192 events => throughput is 1.13E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0
- [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    2.3880s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5917s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.7964s for    90112 events => throughput is 1.13E+05 events/s
+ [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.6071s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.8898s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.7173s for    81920 events => throughput is 1.14E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110539350666329] fbridge_mode=1
- [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4162s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3378s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0776s for     8192 events => throughput is 1.06E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 0.2031 [0.20313504495344831] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4867s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4094s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0766s for     8192 events => throughput is 1.07E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539350666329) differ by less than 2E-4 (2.2020940626532592e-11)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504495344831) differ by less than 2E-4 (5.115954326839756e-10)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2151 [0.21510686560103207] fbridge_mode=1
- [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    2.4355s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5736s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8611s for    90112 events => throughput is 1.05E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 0.211 [0.21095842877343590] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.6750s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9006s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.7737s for    81920 events => throughput is 1.06E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686560103207) differ by less than 2E-4 (1.646582870051816e-10)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877343590) differ by less than 2E-4 (3.982036922423049e-12)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.055368e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.066197e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.053968e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.081547e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110539350666335] fbridge_mode=1
- [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3844s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3408s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0429s for     8192 events => throughput is 1.91E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.2031 [0.20313504495344833] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4594s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4139s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0450s for     8192 events => throughput is 1.82E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539350666335) differ by less than 2E-4 (2.2020718581927667e-11)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504495344833) differ by less than 2E-4 (5.115952106393706e-10)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2151 [0.21510686560103204] fbridge_mode=1
- [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    2.0360s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5625s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4729s for    90112 events => throughput is 1.91E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.211 [0.21095842877343590] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.3287s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9003s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4278s for    81920 events => throughput is 1.91E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686560103204) differ by less than 2E-4 (1.6465806496057667e-10)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877343590) differ by less than 2E-4 (3.982036922423049e-12)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.911607e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.915335e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.920193e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.909065e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110539330887440] fbridge_mode=1
- [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3629s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3374s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0249s for     8192 events => throughput is 3.29E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4419s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4163s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0251s for     8192 events => throughput is 3.26E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539330887440) differ by less than 2E-4 (7.515855715567454e-10)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504510700500) differ by less than 2E-4 (2.4433854939331923e-10)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2151 [0.21510686557693198] fbridge_mode=1
- [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8345s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5624s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2714s for    90112 events => throughput is 3.32E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.1482s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9012s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2465s for    81920 events => throughput is 3.32E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686557693198) differ by less than 2E-4 (5.262057456434377e-11)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842875361914) differ by less than 2E-4 (9.791889521437724e-11)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.345273e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.313762e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.258323e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.370922e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110539330887440] fbridge_mode=1
- [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3609s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3379s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0225s for     8192 events => throughput is 3.64E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4314s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4086s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0223s for     8192 events => throughput is 3.67E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539330887440) differ by less than 2E-4 (7.515855715567454e-10)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504510700500) differ by less than 2E-4 (2.4433854939331923e-10)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2151 [0.21510686557693198] fbridge_mode=1
- [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8072s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5644s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2423s for    90112 events => throughput is 3.72E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.1188s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9005s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2179s for    81920 events => throughput is 3.76E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686557693198) differ by less than 2E-4 (5.262057456434377e-11)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842875361914) differ by less than 2E-4 (9.791889521437724e-11)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.708299e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.747505e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.793939e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.784395e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110539330887440] fbridge_mode=1
- [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3730s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3362s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0361s for     8192 events => throughput is 2.27E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4497s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4136s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0355s for     8192 events => throughput is 2.31E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539330887440) differ by less than 2E-4 (7.515855715567454e-10)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504510700500) differ by less than 2E-4 (2.4433854939331923e-10)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2151 [0.21510686557693198] fbridge_mode=1
- [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    1.9604s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5708s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3889s for    90112 events => throughput is 2.32E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.2638s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9140s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3492s for    81920 events => throughput is 2.35E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686557693198) differ by less than 2E-4 (5.262057456434377e-11)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842875361914) differ by less than 2E-4 (9.791889521437724e-11)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.276917e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.324769e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.303186e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.345529e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2711 [0.27110539343558537] fbridge_mode=1
- [UNWEIGHT] Wrote 404 events (found 1228 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7690s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7671s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0008s for     8192 events => throughput is 1.08E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
+ [XSECTION] Cross section = 0.2031 [0.20313504512110778] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8545s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8502s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0032s for     8192 events => throughput is 2.59E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27110539351263330) and cuda (0.27110539343558537) differ by less than 2E-4 (2.8419910869104115e-10)
+OK! xsec from fortran (0.20313504505737126) and cuda (0.20313504512110778) differ by less than 2E-4 (3.1376434783680907e-10)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2151 [0.21510686553631395] fbridge_mode=1
- [UNWEIGHT] Wrote 1939 events (found 1944 events)
- [COUNTERS] PROGRAM TOTAL          :    2.0035s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.9941s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0082s for    90112 events => throughput is 1.10E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
+ [XSECTION] Cross section = 0.211 [0.21095842873460982] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.3685s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.3565s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0108s for    81920 events => throughput is 7.58E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21510686556561295) and cuda (0.21510686553631395) differ by less than 2E-4 (1.3620671257541517e-10)
+OK! xsec from fortran (0.21095842877427595) and cuda (0.21095842873460982) differ by less than 2E-4 (1.8802814860663375e-10)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.605327e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.938068e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.982731e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.128844e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.566857e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.264635e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.554656e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.049713e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.578971e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.259722e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.820298e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.250826e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.584085e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.279255e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.787764e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.647946e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt
index 599f2d92c2..fad5d1a64f 100644
--- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt
@@ -1,19 +1,19 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 
+make USEBUILDDIR=1 BACKEND=cuda
 
 
-make USEBUILDDIR=1 BACKEND=cuda
 make USEBUILDDIR=1 BACKEND=cppnone
 
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
-
-make USEBUILDDIR=1 BACKEND=cpp512y
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
+make USEBUILDDIR=1 BACKEND=cpp512y
+
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_05:04:08
+DATE: 2024-09-18_17:32:01
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0
- [UNWEIGHT] Wrote 3321 events (found 6423 events)
- [COUNTERS] PROGRAM TOTAL          :    0.9413s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8936s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0477s for     8192 events => throughput is 1.72E+05 events/s
+ [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0
+ [UNWEIGHT] Wrote 3371 events (found 6399 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.9760s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.9274s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0485s for     8192 events => throughput is 1.69E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0
- [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4211s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3732s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0479s for     8192 events => throughput is 1.71E+05 events/s
+ [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0
+ [UNWEIGHT] Wrote 1652 events (found 1657 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4561s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4081s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0480s for     8192 events => throughput is 1.71E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0
- [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8422s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3150s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.5273s for    90112 events => throughput is 1.71E+05 events/s
+ [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0599s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5836s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4763s for    81920 events => throughput is 1.72E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.016 [2.0162955499256148] fbridge_mode=1
- [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4307s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3786s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0515s for     8192 events => throughput is 1.59E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 2.016 [2.0160081479755170] fbridge_mode=1
+ [UNWEIGHT] Wrote 1652 events (found 1657 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4589s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4083s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0501s for     8192 events => throughput is 1.64E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256148) differ by less than 3E-14 (6.661338147750939e-16)
+OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755170) differ by less than 3E-14 (6.661338147750939e-16)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1
- [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8612s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2987s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5620s for    90112 events => throughput is 1.60E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0776s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5685s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5087s for    81920 events => throughput is 1.61E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377564) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865285) differ by less than 3E-14 (0.0)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.636065e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.656917e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.650747e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.653176e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1
- [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3992s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3709s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0279s for     8192 events => throughput is 2.94E+05 events/s
+ [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=1
+ [UNWEIGHT] Wrote 1652 events (found 1657 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4352s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4071s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0277s for     8192 events => throughput is 2.96E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256152) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755183) differ by less than 3E-14 (0.0)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1
- [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.6028s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2911s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3113s for    90112 events => throughput is 2.89E+05 events/s
+ [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.8492s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5718s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2771s for    81920 events => throughput is 2.96E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377564) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865285) differ by less than 3E-14 (0.0)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.975115e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.911436e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.964788e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.988061e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.016 [2.0162955499256232] fbridge_mode=1
- [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3885s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3708s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0172s for     8192 events => throughput is 4.76E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 2.016 [2.0160081479755165] fbridge_mode=1
+ [UNWEIGHT] Wrote 1652 events (found 1657 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4258s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4084s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0170s for     8192 events => throughput is 4.81E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256232) differ by less than 3E-14 (3.552713678800501e-15)
+OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755165) differ by less than 3E-14 (8.881784197001252e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.043 [2.0434895240377489] fbridge_mode=1
- [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4811s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2942s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1864s for    90112 events => throughput is 4.83E+05 events/s
+ [XSECTION] Cross section = 2.034 [2.0336713375865476] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7395s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5731s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1660s for    81920 events => throughput is 4.93E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377489) differ by less than 3E-14 (3.885780586188048e-15)
+OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865476) differ by less than 3E-14 (9.325873406851315e-15)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.599662e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.731125e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.637890e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.942521e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.016 [2.0162955499256232] fbridge_mode=1
- [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4631s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4466s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0161s for     8192 events => throughput is 5.09E+05 events/s
+ [XSECTION] Cross section = 2.016 [2.0160081479755165] fbridge_mode=1
+ [UNWEIGHT] Wrote 1652 events (found 1657 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4230s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4072s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0154s for     8192 events => throughput is 5.31E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256232) differ by less than 3E-14 (3.552713678800501e-15)
+OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755165) differ by less than 3E-14 (8.881784197001252e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.043 [2.0434895240377489] fbridge_mode=1
- [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4597s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2887s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1706s for    90112 events => throughput is 5.28E+05 events/s
+ [XSECTION] Cross section = 2.034 [2.0336713375865476] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7429s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5869s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1556s for    81920 events => throughput is 5.26E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377489) differ by less than 3E-14 (3.885780586188048e-15)
+OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865476) differ by less than 3E-14 (9.325873406851315e-15)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.357156e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.403364e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.487847e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.439647e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1
- [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3943s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3702s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0235s for     8192 events => throughput is 3.48E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 2.016 [2.0160081479755179] fbridge_mode=1
+ [UNWEIGHT] Wrote 1652 events (found 1657 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4355s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4114s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0237s for     8192 events => throughput is 3.46E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256152) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755179) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1
- [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5502s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2928s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2568s for    90112 events => throughput is 3.51E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.8196s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5847s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2345s for    81920 events => throughput is 3.49E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377560) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865285) differ by less than 3E-14 (0.0)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.535346e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.483959e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.536932e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.537527e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.016 [2.0162955499256165] fbridge_mode=1
- [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8001s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7986s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0007s for     8192 events => throughput is 1.22E+07 events/s
+ [XSECTION] Cross section = 2.016 [2.0160081479755192] fbridge_mode=1
+ [UNWEIGHT] Wrote 1652 events (found 1657 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8564s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8526s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.65E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0162955499256161) and cuda (2.0162955499256165) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (2.0160081479755183) and cuda (2.0160081479755192) differ by less than 3E-14 (4.440892098500626e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.043 [2.0434895240377573] fbridge_mode=1
- [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7266s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7188s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0070s for    90112 events => throughput is 1.29E+07 events/s
+ [XSECTION] Cross section = 2.034 [2.0336713375865294] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0272s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0173s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0091s for    81920 events => throughput is 9.01E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0434895240377569) and cuda (2.0434895240377573) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (2.0336713375865285) and cuda (2.0336713375865294) differ by less than 3E-14 (4.440892098500626e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.985964e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.829708e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.226056e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.382767e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.193230e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.774663e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.709507e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.124992e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.177681e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.755835e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.042958e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.430950e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.170672e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.756916e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.763585e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.513302e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt
index d9149a96bc..4984f73b96 100644
--- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt
@@ -1,22 +1,22 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 
-make USEBUILDDIR=1 BACKEND=cuda
-
 
+make USEBUILDDIR=1 BACKEND=cuda
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make USEBUILDDIR=1 BACKEND=cppsse4
 
+make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+
 make USEBUILDDIR=1 BACKEND=cpp512y
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_05:04:35
+DATE: 2024-09-18_17:32:29
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0
- [UNWEIGHT] Wrote 3321 events (found 6423 events)
- [COUNTERS] PROGRAM TOTAL          :    0.9322s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8838s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0484s for     8192 events => throughput is 1.69E+05 events/s
+ [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0
+ [UNWEIGHT] Wrote 3371 events (found 6399 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.9597s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.9125s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0473s for     8192 events => throughput is 1.73E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0
- [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4198s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3714s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0484s for     8192 events => throughput is 1.69E+05 events/s
+ [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0
+ [UNWEIGHT] Wrote 1652 events (found 1657 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4554s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4080s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0474s for     8192 events => throughput is 1.73E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0
- [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8381s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3115s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.5266s for    90112 events => throughput is 1.71E+05 events/s
+ [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0424s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5675s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4749s for    81920 events => throughput is 1.72E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,43 +125,39 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.016 [2.0162897355760356] fbridge_mode=1
- [UNWEIGHT] Wrote 1620 events (found 1625 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4164s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3687s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0472s for     8192 events => throughput is 1.73E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 2.016 [2.0160406825242951] fbridge_mode=1
+ [UNWEIGHT] Wrote 1653 events (found 1658 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4547s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4074s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0470s for     8192 events => throughput is 1.74E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0162955499256161) and cpp (2.0162897355760356) differ by less than 4E-4 (2.8836792208553064e-06)
+OK! xsec from fortran (2.0160081479755183) and cpp (2.0160406825242951) differ by less than 4E-4 (1.6138103811513815e-05)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 ERROR! events.lhe.cpp.1 and events.lhe.ref.1 differ!
 diff /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20
-6206,6207c6206,6207
-<          21   -1    0    0  502  503 -0.00000000000E+00 -0.00000000000E+00 -0.59936081260E+01  0.59936081260E+01  0.00000000000E+00 0. -1.
-<           5    1    1    2  501    0  0.45273385612E+02 -0.31131305296E+02  0.47763304676E+03  0.48080583916E+03  0.47000000000E+01 0.  1.
----
->          21   -1    0    0  502  503 -0.00000000000E+00 -0.00000000000E+00 -0.59936081260E+01  0.59936081260E+01  0.00000000000E+00 0.  1.
->           5    1    1    2  501    0  0.45273385612E+02 -0.31131305296E+02  0.47763304676E+03  0.48080583916E+03  0.47000000000E+01 0. -1.
-8306,8307c8306,8307
-<          21   -1    0    0  502  503 -0.00000000000E+00 -0.00000000000E+00 -0.23857997239E+02  0.23857997239E+02  0.00000000000E+00 0.  1.
-<           5    1    1    2  501    0 -0.34843521722E+02  0.35239303629E+02  0.13219496682E+02  0.51504607743E+02  0.47000000000E+01 0. -1.
----
->          21   -1    0    0  502  503 -0.00000000000E+00 -0.00000000000E+00 -0.23857997239E+02  0.23857997239E+02  0.00000000000E+00 0. -1.
->           5    1    1    2  501    0 -0.34843521722E+02  0.35239303629E+02  0.13219496682E+02  0.51504607743E+02  0.47000000000E+01 0.  1.
-9606,9619d9605
-< 4 1 1E-03 0.1250139E+03 0.7546771E-02 0.1235066E+00
-<          21   -1    0    0  503  502  0.00000000000E+00  0.00000000000E+00  0.94948250004E+03  0.94948250004E+03  0.00000000000E+00 0.  1.
-<          21   -1    0    0  502  503 -0.00000000000E+00 -0.00000000000E+00 -0.41149990002E+01  0.41149990002E+01  0.00000000000E+00 0. -1.
-<           5    1    1    2  501    0 -0.96459450317E+01 -0.34409175043E+02  0.83136584965E+02  0.90613560477E+02  0.47000000000E+01 0. -1.
-<          -5    1    1    2    0  501  0.96459450317E+01  0.34409175043E+02  0.86223091608E+03  0.86298393857E+03  0.47000000000E+01 0.  1.
+7562,7575d7561
+< 4 1 1E-03 0.1250010E+03 0.7546771E-02 0.1235066E+00
+<          21   -1    0    0  503  502  0.00000000000E+00  0.00000000000E+00  0.71320499473E+02  0.71320499473E+02  0.00000000000E+00 0.  1.
+<          21   -1    0    0  502  503 -0.00000000000E+00 -0.00000000000E+00 -0.54771239790E+02  0.54771239790E+02  0.00000000000E+00 0.  1.
+<           5    1    1    2  501    0  0.50303102232E+02  0.36190119942E+02  0.14973002893E+02  0.63925016162E+02  0.47000000000E+01 0. -1.
+<          -5    1    1    2    0  501 -0.50303102232E+02 -0.36190119942E+02  0.15762567893E+01  0.62166723101E+02  0.47000000000E+01 0. -1.
 < <mgrwt>
-< <rscale>  0 0.12501391E+03</rscale>
+< <rscale>  0 0.12500099E+03</rscale>
+< <asrwt>0</asrwt>
+< <pdfrwt beam="1">  1       21 0.10972385E-01 0.12500099E+03</pdfrwt>
+< <pdfrwt beam="2">  1       21 0.84263445E-02 0.12500099E+03</pdfrwt>
+< <totfact> 0.73891524E+06</totfact>
+< </mgrwt>
+< </event>
+< <event>
diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt
index 93c59c43c2..e45c8953e0 100644
--- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt
@@ -1,28 +1,28 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 
-
 make USEBUILDDIR=1 BACKEND=cuda
 
+
 make USEBUILDDIR=1 BACKEND=cppnone
+
 make USEBUILDDIR=1 BACKEND=cppsse4
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-
 make USEBUILDDIR=1 BACKEND=cpp512y
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_05:04:41
+DATE: 2024-09-18_17:32:35
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0
- [UNWEIGHT] Wrote 3321 events (found 6423 events)
- [COUNTERS] PROGRAM TOTAL          :    0.9477s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8996s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0481s for     8192 events => throughput is 1.70E+05 events/s
+ [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0
+ [UNWEIGHT] Wrote 3371 events (found 6399 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.9574s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.9096s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0478s for     8192 events => throughput is 1.71E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0
- [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4203s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3730s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0473s for     8192 events => throughput is 1.73E+05 events/s
+ [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0
+ [UNWEIGHT] Wrote 1652 events (found 1657 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4551s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4079s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0472s for     8192 events => throughput is 1.74E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0
- [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8386s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3127s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.5259s for    90112 events => throughput is 1.71E+05 events/s
+ [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0377s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5636s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4741s for    81920 events => throughput is 1.73E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -126,22 +126,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.016 [2.0162955975930954] fbridge_mode=1
- [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4234s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3719s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0509s for     8192 events => throughput is 1.61E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 2.016 [2.0160081964453331] fbridge_mode=1
+ [UNWEIGHT] Wrote 1652 events (found 1657 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4555s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4040s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0511s for     8192 events => throughput is 1.60E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955975930954) differ by less than 2E-4 (2.3641117063988304e-08)
+OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081964453331) differ by less than 2E-4 (2.4042469792817656e-08)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -161,22 +162,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.043 [2.0434895706383660] fbridge_mode=1
- [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8536s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2929s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5602s for    90112 events => throughput is 1.61E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 2.034 [2.0336713843200420] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0631s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5575s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5052s for    81920 events => throughput is 1.62E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895706383660) differ by less than 2E-4 (2.2804427679545825e-08)
+OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713843200420) differ by less than 2E-4 (2.2979875113904313e-08)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -184,15 +186,16 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.543543e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.544125e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.554670e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.558149e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -208,22 +211,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.016 [2.0162955975930958] fbridge_mode=1
- [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3974s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3691s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0279s for     8192 events => throughput is 2.94E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 2.016 [2.0160081964453336] fbridge_mode=1
+ [UNWEIGHT] Wrote 1652 events (found 1657 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4342s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4061s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0277s for     8192 events => throughput is 2.95E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955975930958) differ by less than 2E-4 (2.364111728603291e-08)
+OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081964453336) differ by less than 2E-4 (2.404247001486226e-08)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -243,22 +247,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.043 [2.0434895706383669] fbridge_mode=1
- [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.6024s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2932s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3087s for    90112 events => throughput is 2.92E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 2.034 [2.0336713843200425] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.8495s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5723s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2768s for    81920 events => throughput is 2.96E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895706383669) differ by less than 2E-4 (2.2804428123635034e-08)
+OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713843200425) differ by less than 2E-4 (2.2979875335948918e-08)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -266,15 +271,16 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.657975e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.837690e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.837138e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.886878e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -290,22 +296,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.016 [2.0162955953696393] fbridge_mode=1
- [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3887s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3707s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0175s for     8192 events => throughput is 4.69E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 2.016 [2.0160081962974745] fbridge_mode=1
+ [UNWEIGHT] Wrote 1652 events (found 1657 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4268s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4090s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0174s for     8192 events => throughput is 4.71E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955953696393) differ by less than 2E-4 (2.2538374055969257e-08)
+OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081962974745) differ by less than 2E-4 (2.3969127349587893e-08)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -325,22 +332,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.043 [2.0434895701245432] fbridge_mode=1
- [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4978s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3038s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1936s for    90112 events => throughput is 4.65E+05 events/s
+ [XSECTION] Cross section = 2.034 [2.0336713836598665] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7266s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5577s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1685s for    81920 events => throughput is 4.86E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895701245432) differ by less than 2E-4 (2.255298392483951e-08)
+OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713836598665) differ by less than 2E-4 (2.265525278488667e-08)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -348,15 +356,16 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.667384e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.779100e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.720430e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.788127e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,22 +381,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.016 [2.0162955953696393] fbridge_mode=1
- [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3879s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3718s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0157s for     8192 events => throughput is 5.23E+05 events/s
+ [XSECTION] Cross section = 2.016 [2.0160081962974745] fbridge_mode=1
+ [UNWEIGHT] Wrote 1652 events (found 1657 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4240s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4080s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0156s for     8192 events => throughput is 5.24E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955953696393) differ by less than 2E-4 (2.2538374055969257e-08)
+OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081962974745) differ by less than 2E-4 (2.3969127349587893e-08)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -407,22 +417,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.043 [2.0434895701245432] fbridge_mode=1
- [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4676s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2932s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1740s for    90112 events => throughput is 5.18E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 2.034 [2.0336713836598665] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7202s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5626s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1572s for    81920 events => throughput is 5.21E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895701245432) differ by less than 2E-4 (2.255298392483951e-08)
+OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713836598665) differ by less than 2E-4 (2.265525278488667e-08)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -430,15 +441,16 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.145643e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.210846e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.181518e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.166322e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -454,22 +466,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1
- [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3947s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3700s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0242s for     8192 events => throughput is 3.38E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 2.016 [2.0160081962970020] fbridge_mode=1
+ [UNWEIGHT] Wrote 1652 events (found 1657 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4371s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4123s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0244s for     8192 events => throughput is 3.35E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955953691082) differ by less than 2E-4 (2.253811048902321e-08)
+OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081962970020) differ by less than 2E-4 (2.3968893092529697e-08)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -489,22 +502,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1
- [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5616s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2951s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2660s for    90112 events => throughput is 3.39E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 2.034 [2.0336713836598515] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.8082s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5683s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2395s for    81920 events => throughput is 3.42E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895701243878) differ by less than 2E-4 (2.255290776354002e-08)
+OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713836598515) differ by less than 2E-4 (2.2655245235370103e-08)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -512,15 +526,16 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.165146e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.192275e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.207013e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.206869e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -535,22 +550,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.016 [2.0162955503257827] fbridge_mode=1
- [UNWEIGHT] Wrote 1617 events (found 1622 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7985s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7970s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0007s for     8192 events => throughput is 1.25E+07 events/s
+ [XSECTION] Cross section = 2.016 [2.0160081483021330] fbridge_mode=1
+ [UNWEIGHT] Wrote 1652 events (found 1657 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8528s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8489s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.64E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0162955499256161) and cuda (2.0162955503257827) differ by less than 2E-4 (1.9846613241725208e-10)
+OK! xsec from fortran (2.0160081479755183) and cuda (2.0160081483021330) differ by less than 2E-4 (1.6201062713605552e-10)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -569,22 +585,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.043 [2.0434895242795732] fbridge_mode=1
- [UNWEIGHT] Wrote 1818 events (found 1823 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7427s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7348s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0071s for    90112 events => throughput is 1.26E+07 events/s
+ [XSECTION] Cross section = 2.034 [2.0336713380111449] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0087s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9990s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0090s for    81920 events => throughput is 9.13E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0434895240377569) and cuda (2.0434895242795732) differ by less than 2E-4 (1.183348974365117e-10)
+OK! xsec from fortran (2.0336713375865285) and cuda (2.0336713380111449) differ by less than 2E-4 (2.0879298290310544e-10)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -593,42 +610,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.059399e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.939022e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.161680e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.244768e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.190897e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.749489e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.747163e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.094535e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.176819e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.768020e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.038339e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.372316e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.199275e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.761406e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.748317e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.486762e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt
index 68ef447e1c..cf925a09c6 100644
--- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt
@@ -1,10 +1,10 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 
+
 make USEBUILDDIR=1 BACKEND=cuda
 
 make USEBUILDDIR=1 BACKEND=cppnone
 
-
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
 
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_05:07:41
+DATE: 2024-09-18_17:35:48
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0
- [UNWEIGHT] Wrote 1 events (found 1041 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6583s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3435s
- [COUNTERS] Fortran MEs      ( 1 ) :    2.3148s for     8192 events => throughput is 3.54E+03 events/s
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 1 events (found 902 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.6643s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3661s
+ [COUNTERS] Fortran MEs      ( 1 ) :    2.2982s for     8192 events => throughput is 3.56E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0
- [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6583s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3400s
- [COUNTERS] Fortran MEs      ( 1 ) :    2.3182s for     8192 events => throughput is 3.53E+03 events/s
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 230 events (found 851 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.6579s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3588s
+ [COUNTERS] Fortran MEs      ( 1 ) :    2.2991s for     8192 events => throughput is 3.56E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0
- [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :   27.3144s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8056s
- [COUNTERS] Fortran MEs      ( 1 ) :   25.5088s for    90112 events => throughput is 3.53E+03 events/s
+ [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :   25.0583s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0507s
+ [COUNTERS] Fortran MEs      ( 1 ) :   23.0076s for    81920 events => throughput is 3.56E+03 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    2.8421s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3410s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    2.4957s for     8192 events => throughput is 3.28E+03 events/s
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 230 events (found 851 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.8350s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3611s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    2.4687s for     8192 events => throughput is 3.32E+03 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0053s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.667e-07 [7.6668083551438187E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :   29.2164s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8016s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   27.4095s for    90112 events => throughput is 3.29E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0053s
+ [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :   26.8636s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0489s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   24.8095s for    81920 events => throughput is 3.30E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0052s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438187E-007) differ by less than 3E-14 (5.551115123125783e-16)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898148E-007) differ by less than 3E-14 (0.0)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.417499e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.460402e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.436809e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.457448e+03                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896697955084412E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    1.6539s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3409s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.3101s for     8192 events => throughput is 6.25E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0029s
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728610E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 230 events (found 851 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6570s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3616s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.2927s for     8192 events => throughput is 6.34E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0027s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084412E-007) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728610E-007) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :   16.3084s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8060s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   14.4993s for    90112 events => throughput is 6.21E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0030s
+ [XSECTION] Cross section = 7.654e-07 [7.6542926582898191E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :   15.0457s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0488s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   12.9941s for    81920 events => throughput is 6.30E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0028s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438230E-007) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898191E-007) differ by less than 3E-14 (4.440892098500626e-16)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.430064e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.568599e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.462632e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.542585e+03                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    0.9243s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3403s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5823s for     8192 events => throughput is 1.41E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0018s
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 230 events (found 851 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.9401s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3632s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5754s for     8192 events => throughput is 1.42E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0015s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    8.2383s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8071s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    6.4295s for    90112 events => throughput is 1.40E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0017s
+ [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    7.7938s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0478s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    5.7444s for    81920 events => throughput is 1.43E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0015s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438198E-007) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.441882e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.465958e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.451357e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.451297e+04                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8589s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3412s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5161s for     8192 events => throughput is 1.59E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0016s
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 230 events (found 851 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8791s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3648s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5129s for     8192 events => throughput is 1.60E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0014s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    7.4852s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8014s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    5.6822s for    90112 events => throughput is 1.59E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0016s
+ [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    7.1685s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0408s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    5.1262s for    81920 events => throughput is 1.60E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0014s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438198E-007) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.632945e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.660633e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.644376e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.663487e+04                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    1.0252s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3429s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.6803s for     8192 events => throughput is 1.20E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0020s
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 230 events (found 851 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.0357s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3623s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.6717s for     8192 events => throughput is 1.22E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0018s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    9.2840s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8079s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    7.4740s for    90112 events => throughput is 1.21E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0021s
+ [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    8.7351s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0383s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    6.6950s for    81920 events => throughput is 1.22E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0017s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438198E-007) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.223468e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.242378e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.221461e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.242909e+04                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8083s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7715s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0172s for     8192 events => throughput is 4.76E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0195s
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728578E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 230 events (found 851 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8480s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8085s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0198s for     8192 events => throughput is 4.13E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0197s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9896697955084444E-007) and cuda (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (7.6381610362728588E-007) and cuda (7.6381610362728578E-007) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    2.4407s
- [COUNTERS] Fortran Overhead ( 0 ) :    2.2322s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1889s for    90112 events => throughput is 4.77E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0195s
+ [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.6736s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.4774s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1765s for    81920 events => throughput is 4.64E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0197s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6668083551438230E-007) and cuda (7.6668083551438198E-007) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (7.6542926582898148E-007) and cuda (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.831207e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.229187e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.210893e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.527847e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.189493e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.819324e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.418574e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.226919e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.138850e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.844216e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.408292e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.225190e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.156050e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.847840e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.757548e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.681732e+05                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt
index 161dd39f0e..d625debf72 100644
--- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt
@@ -2,8 +2,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s
 
 
 make USEBUILDDIR=1 BACKEND=cuda
-make USEBUILDDIR=1 BACKEND=cppnone
 
+make USEBUILDDIR=1 BACKEND=cppnone
 
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_05:10:07
+DATE: 2024-09-18_17:38:08
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0
- [UNWEIGHT] Wrote 1 events (found 1041 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6746s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3402s
- [COUNTERS] Fortran MEs      ( 1 ) :    2.3343s for     8192 events => throughput is 3.51E+03 events/s
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 1 events (found 902 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.6507s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3559s
+ [COUNTERS] Fortran MEs      ( 1 ) :    2.2948s for     8192 events => throughput is 3.57E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0
- [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6608s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3466s
- [COUNTERS] Fortran MEs      ( 1 ) :    2.3142s for     8192 events => throughput is 3.54E+03 events/s
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 230 events (found 851 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.6503s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3594s
+ [COUNTERS] Fortran MEs      ( 1 ) :    2.2909s for     8192 events => throughput is 3.58E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0
- [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :   27.2560s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7984s
- [COUNTERS] Fortran MEs      ( 1 ) :   25.4576s for    90112 events => throughput is 3.54E+03 events/s
+ [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :   25.0293s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0436s
+ [COUNTERS] Fortran MEs      ( 1 ) :   22.9857s for    81920 events => throughput is 3.56E+03 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896784952157763E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    2.8058s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3400s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    2.4606s for     8192 events => throughput is 3.33E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0052s
+ [XSECTION] Cross section = 7.638e-07 [7.6381686438954397E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 230 events (found 851 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.7985s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3626s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    2.4310s for     8192 events => throughput is 3.37E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0049s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896784952157763E-007) differ by less than 4E-4 (1.088869447052332e-06)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381686438954397E-007) differ by less than 4E-4 (9.960018576560259e-07)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.667e-07 [7.6668138450782073E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :   28.8449s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8046s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   27.0351s for    90112 events => throughput is 3.33E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0052s
+ [XSECTION] Cross section = 7.654e-07 [7.6542978900095690E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :   26.3775s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0419s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   24.3307s for    81920 events => throughput is 3.37E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0048s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668138450782073E-007) differ by less than 4E-4 (7.160651642745819e-07)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542978900095690E-007) differ by less than 4E-4 (6.835014008110818e-07)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.458764e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.486852e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.452922e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.494086e+03                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896766542858863E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    1.0243s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3393s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.6833s for     8192 events => throughput is 1.20E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0017s
+ [XSECTION] Cross section = 7.638e-07 [7.6381671483253128E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 230 events (found 851 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.0412s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3621s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.6777s for     8192 events => throughput is 1.21E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0015s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896766542858863E-007) differ by less than 4E-4 (8.584556829838164e-07)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381671483253128E-007) differ by less than 4E-4 (8.001994753481512e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.667e-07 [7.6668121906848987E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    9.3335s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8075s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    7.5244s for    90112 events => throughput is 1.20E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0017s
+ [XSECTION] Cross section = 7.654e-07 [7.6542962735029303E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    8.8470s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0545s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    6.7910s for    81920 events => throughput is 1.21E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0015s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668121906848987E-007) differ by less than 4E-4 (5.002787206720427e-07)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542962735029303E-007) differ by less than 4E-4 (4.7231184874263477e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.227692e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.236746e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.229550e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.228135e+04                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6384s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3405s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2969s for     8192 events => throughput is 2.76E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0010s
+ [XSECTION] Cross section = 7.638e-07 [7.6381672175647812E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 230 events (found 851 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6595s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3662s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2926s for     8192 events => throughput is 2.80E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896764408326359E-007) differ by less than 4E-4 (8.31739528805997e-07)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381672175647812E-007) differ by less than 4E-4 (8.092644150359263e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    5.0435s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8033s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    3.2391s for    90112 events => throughput is 2.78E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
+ [XSECTION] Cross section = 7.654e-07 [7.6542989697352719E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    5.0070s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0356s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    2.9705s for    81920 events => throughput is 2.76E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0009s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668124799901306E-007) differ by less than 4E-4 (5.380134884269694e-07)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542989697352719E-007) differ by less than 4E-4 (8.245628615455303e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.846216e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.852598e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.859531e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.877017e+04                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6113s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3429s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2674s for     8192 events => throughput is 3.06E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0010s
+ [XSECTION] Cross section = 7.638e-07 [7.6381672175647812E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 230 events (found 851 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6241s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3601s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2632s for     8192 events => throughput is 3.11E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896764408326359E-007) differ by less than 4E-4 (8.31739528805997e-07)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381672175647812E-007) differ by less than 4E-4 (8.092644150359263e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    4.7543s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8053s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    2.9479s for    90112 events => throughput is 3.06E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0010s
+ [XSECTION] Cross section = 7.654e-07 [7.6542989697352719E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    4.6833s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0329s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    2.6497s for    81920 events => throughput is 3.09E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668124799901306E-007) differ by less than 4E-4 (5.380134884269694e-07)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542989697352719E-007) differ by less than 4E-4 (8.245628615455303e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.159800e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.188563e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.158171e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.189121e+04                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896778056937195E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6860s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3420s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3427s for     8192 events => throughput is 2.39E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0013s
+ [XSECTION] Cross section = 7.638e-07 [7.6381686320975603E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 230 events (found 851 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6974s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3592s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3373s for     8192 events => throughput is 2.43E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0009s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896778056937195E-007) differ by less than 4E-4 (1.0025677505964836e-06)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381686320975603E-007) differ by less than 4E-4 (9.944572607611946e-07)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.667e-07 [7.6668139178203571E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    5.5700s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8019s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    3.7669s for    90112 events => throughput is 2.39E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
+ [XSECTION] Cross section = 7.654e-07 [7.6543004237976207E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    5.4012s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0331s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    3.3672s for    81920 events => throughput is 2.43E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0010s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668139178203571E-007) differ by less than 4E-4 (7.255530953820255e-07)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6543004237976207E-007) differ by less than 4E-4 (1.014529774634454e-06)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.384494e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.455496e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.358993e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.453904e+04                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896802503195373E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8066s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7726s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0171s for     8192 events => throughput is 4.80E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0170s
+ [XSECTION] Cross section = 7.638e-07 [7.6381711031958629E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 230 events (found 851 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8395s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8026s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0198s for     8192 events => throughput is 4.15E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0171s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9896697955084444E-007) and cuda (7.9896802503195373E-007) differ by less than 4E-4 (1.3085410737190273e-06)
+OK! xsec from fortran (7.6381610362728588E-007) and cuda (7.6381711031958629E-007) differ by less than 4E-4 (1.3179773188376487e-06)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.667e-07 [7.6668190930428073E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    2.4214s
- [COUNTERS] Fortran Overhead ( 0 ) :    2.2342s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1702s for    90112 events => throughput is 5.29E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0170s
+ [XSECTION] Cross section = 7.654e-07 [7.6543026921346333E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.6550s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.4784s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1594s for    81920 events => throughput is 5.14E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0171s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6668083551438230E-007) and cuda (7.6668190930428073E-007) differ by less than 4E-4 (1.400569635601201e-06)
+OK! xsec from fortran (7.6542926582898148E-007) and cuda (7.6543026921346333E-007) differ by less than 4E-4 (1.3108781262705094e-06)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.887616e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.221158e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.141211e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.431078e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.326572e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.300814e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.344235e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.323922e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.327212e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.295837e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.345274e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.322906e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.322182e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.292673e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.678752e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.656202e+05                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt
index 21c70ea563..e6874f3a32 100644
--- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt
@@ -1,11 +1,11 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 
 
-
 make USEBUILDDIR=1 BACKEND=cuda
-make USEBUILDDIR=1 BACKEND=cppnone
 
+make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppsse4
+
 make USEBUILDDIR=1 BACKEND=cppavx2
 
 make USEBUILDDIR=1 BACKEND=cpp512y
@@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_05:12:08
+DATE: 2024-09-18_17:40:03
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0
- [UNWEIGHT] Wrote 1 events (found 1041 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6771s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3415s
- [COUNTERS] Fortran MEs      ( 1 ) :    2.3355s for     8192 events => throughput is 3.51E+03 events/s
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 1 events (found 902 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.6497s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3549s
+ [COUNTERS] Fortran MEs      ( 1 ) :    2.2948s for     8192 events => throughput is 3.57E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0
- [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6590s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3414s
- [COUNTERS] Fortran MEs      ( 1 ) :    2.3176s for     8192 events => throughput is 3.53E+03 events/s
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 230 events (found 851 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.6518s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3572s
+ [COUNTERS] Fortran MEs      ( 1 ) :    2.2945s for     8192 events => throughput is 3.57E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0
- [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :   27.2570s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8020s
- [COUNTERS] Fortran MEs      ( 1 ) :   25.4551s for    90112 events => throughput is 3.54E+03 events/s
+ [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :   25.0481s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0523s
+ [COUNTERS] Fortran MEs      ( 1 ) :   22.9958s for    81920 events => throughput is 3.56E+03 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896696375074447E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    2.9720s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3489s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    2.6176s for     8192 events => throughput is 3.13E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0054s
+ [XSECTION] Cross section = 7.638e-07 [7.6381608764955655E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 230 events (found 851 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.8581s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3622s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    2.4908s for     8192 events => throughput is 3.29E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0051s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696375074447E-007) differ by less than 2E-4 (1.9775660775600556e-08)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608764955655E-007) differ by less than 2E-4 (2.0918293319738268e-08)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.667e-07 [7.6668081976882373E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :   29.4504s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8086s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   27.6363s for    90112 events => throughput is 3.26E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0054s
+ [XSECTION] Cross section = 7.654e-07 [7.6542925018181681E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :   27.0498s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0486s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   24.9962s for    81920 events => throughput is 3.28E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0051s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668081976882373E-007) differ by less than 2E-4 (2.0537305522871918e-08)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925018181681E-007) differ by less than 2E-4 (2.044233915476923e-08)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.407468e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.436817e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.398569e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.435512e+03                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896696285825688E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    1.6359s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3461s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.2869s for     8192 events => throughput is 6.37E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0029s
+ [XSECTION] Cross section = 7.638e-07 [7.6381608686521600E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 230 events (found 851 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6335s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3586s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.2723s for     8192 events => throughput is 6.44E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0026s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696285825688E-007) differ by less than 2E-4 (2.089271267102788e-08)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608686521600E-007) differ by less than 2E-4 (2.1945164241365944e-08)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.667e-07 [7.6668081890954375E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :   16.2099s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8349s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   14.3720s for    90112 events => throughput is 6.27E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0030s
+ [XSECTION] Cross section = 7.654e-07 [7.6542924921991264E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :   14.8859s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0643s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   12.8189s for    81920 events => throughput is 6.39E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0027s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668081890954375E-007) differ by less than 2E-4 (2.1658084770059816e-08)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542924921991264E-007) differ by less than 2E-4 (2.1699025132271288e-08)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.609107e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.796385e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.653433e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.784638e+03                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    0.9279s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3431s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5830s for     8192 events => throughput is 1.41E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0017s
+ [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 230 events (found 851 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.9347s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3638s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5694s for     8192 events => throughput is 1.44E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0015s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696427369838E-007) differ by less than 2E-4 (1.9121123240317672e-08)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608826200266E-007) differ by less than 2E-4 (2.0116469379161117e-08)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    8.3102s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8480s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    6.4604s for    90112 events => throughput is 1.39E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0018s
+ [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    7.7335s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0485s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    5.6834s for    81920 events => throughput is 1.44E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0016s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668082030339872E-007) differ by less than 2E-4 (1.984004671662376e-08)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925056010437E-007) differ by less than 2E-4 (1.994812293126813e-08)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.427331e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.478402e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.434667e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.467295e+04                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8698s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3480s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5202s for     8192 events => throughput is 1.57E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0017s
+ [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 230 events (found 851 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8664s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3632s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5018s for     8192 events => throughput is 1.63E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0014s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696427369838E-007) differ by less than 2E-4 (1.9121123240317672e-08)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608826200266E-007) differ by less than 2E-4 (2.0116469379161117e-08)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    7.5428s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8367s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    5.7043s for    90112 events => throughput is 1.58E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0017s
+ [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    7.0416s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0429s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    4.9973s for    81920 events => throughput is 1.64E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0014s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668082030339872E-007) differ by less than 2E-4 (1.984004671662376e-08)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925056010437E-007) differ by less than 2E-4 (1.994812293126813e-08)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.635041e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.693290e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.637970e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.684165e+04                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    1.0618s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3493s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.7103s for     8192 events => throughput is 1.15E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0022s
+ [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 230 events (found 851 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.0424s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3606s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.6800s for     8192 events => throughput is 1.20E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0019s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696427369838E-007) differ by less than 2E-4 (1.9121123240317672e-08)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608826200266E-007) differ by less than 2E-4 (2.0116469379161117e-08)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    9.5434s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8253s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    7.7159s for    90112 events => throughput is 1.17E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0021s
+ [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    8.8421s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0543s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    6.7861s for    81920 events => throughput is 1.21E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0016s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668082030339872E-007) differ by less than 2E-4 (1.984004671662376e-08)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925056010437E-007) differ by less than 2E-4 (1.994812293126813e-08)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.208934e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.218603e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.204603e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.233808e+04                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.99e-07 [7.9896697918297644E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 215 events (found 963 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8109s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7740s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0173s for     8192 events => throughput is 4.74E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0196s
+ [XSECTION] Cross section = 7.638e-07 [7.6381610372590318E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 230 events (found 851 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8393s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7998s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0197s for     8192 events => throughput is 4.15E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0198s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9896697955084444E-007) and cuda (7.9896697918297644E-007) differ by less than 2E-4 (4.6042958334879813e-10)
+OK! xsec from fortran (7.6381610362728588E-007) and cuda (7.6381610372590318E-007) differ by less than 2E-4 (1.2911138824733825e-10)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.667e-07 [7.6668083551547592E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1700 events (found 1705 events)
- [COUNTERS] PROGRAM TOTAL          :    2.4522s
- [COUNTERS] Fortran Overhead ( 0 ) :    2.2422s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1904s for    90112 events => throughput is 4.73E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0196s
+ [XSECTION] Cross section = 7.654e-07 [7.6542926581386226E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.6799s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.4834s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1767s for    81920 events => throughput is 4.64E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0198s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6668083551438230E-007) and cuda (7.6668083551547592E-007) differ by less than 2E-4 (1.4264145420384011e-12)
+OK! xsec from fortran (7.6542926582898148E-007) and cuda (7.6542926581386226E-007) differ by less than 2E-4 (1.9752643964920935e-11)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.781444e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.207155e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.182913e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.529302e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.157874e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.824963e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.386488e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.207520e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.107983e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.824989e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.377808e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.199605e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.110849e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.829686e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.750476e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.672241e+05                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt
index d53f8d5c95..a3ffe665a4 100644
--- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt
@@ -1,15 +1,14 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 
-
 make USEBUILDDIR=1 BACKEND=cuda
 
 make USEBUILDDIR=1 BACKEND=cppnone
+
 make USEBUILDDIR=1 BACKEND=cppsse4
 
-make USEBUILDDIR=1 BACKEND=cppavx2
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make USEBUILDDIR=1 BACKEND=cppavx2
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
@@ -17,6 +16,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_05:06:26
+DATE: 2024-09-18_17:34:28
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0
- [UNWEIGHT] Wrote 1767 events (found 4306 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6644s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6558s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0087s for     8192 events => throughput is 9.43E+05 events/s
+ [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0
+ [UNWEIGHT] Wrote 1732 events (found 4297 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6936s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6849s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0087s for     8192 events => throughput is 9.41E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0
- [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3909s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3823s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0087s for     8192 events => throughput is 9.46E+05 events/s
+ [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4210s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4125s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0085s for     8192 events => throughput is 9.64E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.311 [0.31098556244384407] fbridge_mode=0
- [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4368s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3432s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0936s for    90112 events => throughput is 9.63E+05 events/s
+ [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6464s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5624s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0840s for    81920 events => throughput is 9.75E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1
- [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3913s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3824s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0084s for     8192 events => throughput is 9.70E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4282s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4193s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0085s for     8192 events => throughput is 9.61E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31156027201869302) and cpp (0.31156027201869291) differ by less than 3E-14 (3.3306690738754696e-16)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426120) differ by less than 3E-14 (0.0)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1
- [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4316s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3380s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0932s for    90112 events => throughput is 9.67E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6471s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5643s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0824s for    81920 events => throughput is 9.94E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31098556244384407) and cpp (0.31098556244384418) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.991231e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.967649e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.007459e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.004982e+06                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1
- [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3890s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3840s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0046s for     8192 events => throughput is 1.79E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4170s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4121s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0046s for     8192 events => throughput is 1.80E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31156027201869302) and cpp (0.31156027201869291) differ by less than 3E-14 (3.3306690738754696e-16)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426120) differ by less than 3E-14 (0.0)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1
- [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3910s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3397s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0509s for    90112 events => throughput is 1.77E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6254s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5798s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0452s for    81920 events => throughput is 1.81E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31098556244384407) and cpp (0.31098556244384418) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.890551e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.903119e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.963594e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.966510e+06                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1
- [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3870s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3837s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0028s for     8192 events => throughput is 2.93E+06 events/s
+ [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4203s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4171s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0029s for     8192 events => throughput is 2.84E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31156027201869302) and cpp (0.31156027201869291) differ by less than 3E-14 (3.3306690738754696e-16)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426114) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1
- [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3706s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3379s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0322s for    90112 events => throughput is 2.79E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.5995s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5705s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0287s for    81920 events => throughput is 2.85E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31098556244384407) and cpp (0.31098556244384418) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.034126e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.156830e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.221092e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.269475e+06                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1
- [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3880s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3848s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0028s for     8192 events => throughput is 2.93E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4170s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4140s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0027s for     8192 events => throughput is 3.00E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31156027201869302) and cpp (0.31156027201869291) differ by less than 3E-14 (3.3306690738754696e-16)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426114) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1
- [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3742s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3423s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0315s for    90112 events => throughput is 2.86E+06 events/s
+ [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.5963s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5689s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0271s for    81920 events => throughput is 3.03E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31098556244384407) and cpp (0.31098556244384418) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.240649e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.272274e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.467487e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.518220e+06                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1
- [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3854s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3817s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0032s for     8192 events => throughput is 2.59E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4193s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4158s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.64E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31156027201869302) and cpp (0.31156027201869291) differ by less than 3E-14 (3.3306690738754696e-16)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426114) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1
- [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3744s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3392s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0347s for    90112 events => throughput is 2.60E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6175s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5862s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0309s for    81920 events => throughput is 2.65E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31098556244384407) and cpp (0.31098556244384418) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.788936e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.808476e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.074319e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.104718e+06                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156027201869280] fbridge_mode=1
- [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8156s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8144s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.39E+07 events/s
+ [XSECTION] Cross section = 0.3045 [0.30449452343426109] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8589s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8551s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0032s for     8192 events => throughput is 2.55E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31156027201869302) and cuda (0.31156027201869280) differ by less than 3E-14 (6.661338147750939e-16)
+OK! xsec from fortran (0.30449452343426120) and cuda (0.30449452343426109) differ by less than 3E-14 (3.3306690738754696e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.311 [0.31098556244384401] fbridge_mode=1
- [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7702s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7641s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0055s for    90112 events => throughput is 1.65E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0146s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0062s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0079s for    81920 events => throughput is 1.04E+07 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31098556244384407) and cuda (0.31098556244384401) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (0.30747310722207288) and cuda (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.977913e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.114496e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.013766e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.411767e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.090365e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.418677e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.620997e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.644247e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.121235e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.424249e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.991647e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.758689e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.084242e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.384808e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.324810e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.177573e+08                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt
index 4417c97e0c..6af3b55835 100644
--- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt
@@ -3,9 +3,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s
 make USEBUILDDIR=1 BACKEND=cuda
 
 
-make USEBUILDDIR=1 BACKEND=cppnone
-
 make USEBUILDDIR=1 BACKEND=cppsse4
+
+make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppavx2
 
 make USEBUILDDIR=1 BACKEND=cpp512y
@@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_05:06:51
+DATE: 2024-09-18_17:34:55
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0
- [UNWEIGHT] Wrote 1767 events (found 4306 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6549s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6462s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0087s for     8192 events => throughput is 9.43E+05 events/s
+ [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0
+ [UNWEIGHT] Wrote 1732 events (found 4297 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6834s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6749s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0085s for     8192 events => throughput is 9.63E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0
- [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3959s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3870s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0088s for     8192 events => throughput is 9.30E+05 events/s
+ [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4230s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4146s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0084s for     8192 events => throughput is 9.72E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.311 [0.31098556244384407] fbridge_mode=0
- [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4361s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3410s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0951s for    90112 events => throughput is 9.47E+05 events/s
+ [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6556s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5716s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0840s for    81920 events => throughput is 9.75E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156021439979276] fbridge_mode=1
- [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3909s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3824s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0082s for     8192 events => throughput is 9.98E+05 events/s
+ [XSECTION] Cross section = 0.3045 [0.30449446496609361] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4261s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4173s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0085s for     8192 events => throughput is 9.65E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31156027201869302) and cpp (0.31156021439979276) differ by less than 4E-4 (1.8493660913776466e-07)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446496609361) differ by less than 4E-4 (1.9201714018812766e-07)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.311 [0.31098550550786874] fbridge_mode=1
- [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4299s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3366s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0930s for    90112 events => throughput is 9.69E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.3075 [0.30747305007079218] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6758s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5913s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0842s for    81920 events => throughput is 9.73E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31098556244384407) and cpp (0.31098550550786874) differ by less than 4E-4 (1.8308237492714596e-07)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305007079218) differ by less than 4E-4 (1.858740792393121e-07)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.408795e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.013895e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.021260e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.014072e+06                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156021343761686] fbridge_mode=1
- [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3872s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3842s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0027s for     8192 events => throughput is 3.01E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.3045 [0.30449446369440458] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4170s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4141s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0028s for     8192 events => throughput is 2.96E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31156027201869302) and cpp (0.31156021343761686) differ by less than 4E-4 (1.8802485879376718e-07)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446369440458) differ by less than 4E-4 (1.961935339744869e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.311 [0.31098550488814170] fbridge_mode=1
- [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3638s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3333s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0302s for    90112 events => throughput is 2.98E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.3075 [0.30747304961041555] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6056s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5782s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0272s for    81920 events => throughput is 3.01E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31098556244384407) and cpp (0.31098550488814170) differ by less than 4E-4 (1.8507515886501125e-07)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747304961041555) differ by less than 4E-4 (1.8737136997515336e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.285260e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.210079e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.345950e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.272367e+06                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156021516056748] fbridge_mode=1
- [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3861s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3838s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0019s for     8192 events => throughput is 4.22E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.3045 [0.30449446614968528] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4185s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4165s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0018s for     8192 events => throughput is 4.66E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31156027201869302) and cpp (0.31156021516056748) differ by less than 4E-4 (1.8249478717091705e-07)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446614968528) differ by less than 4E-4 (1.881300697448296e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.311 [0.31098550596898289] fbridge_mode=1
- [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3595s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3386s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0206s for    90112 events => throughput is 4.38E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.3075 [0.30747305065199410] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6070s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5879s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0188s for    81920 events => throughput is 4.35E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31098556244384407) and cpp (0.31098550596898289) differ by less than 4E-4 (1.815996238940798e-07)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305065199410) differ by less than 4E-4 (1.839838263961724e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.963399e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.874017e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.167457e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.300612e+06                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156021516056748] fbridge_mode=1
- [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3858s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3837s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0018s for     8192 events => throughput is 4.45E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.3045 [0.30449446614968528] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4142s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4122s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0018s for     8192 events => throughput is 4.54E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31156027201869302) and cpp (0.31156021516056748) differ by less than 4E-4 (1.8249478717091705e-07)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446614968528) differ by less than 4E-4 (1.881300697448296e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.311 [0.31098550596898289] fbridge_mode=1
- [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3645s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3439s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0203s for    90112 events => throughput is 4.44E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.3075 [0.30747305065199410] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.5999s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5818s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0179s for    81920 events => throughput is 4.59E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31098556244384407) and cpp (0.31098550596898289) differ by less than 4E-4 (1.815996238940798e-07)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305065199410) differ by less than 4E-4 (1.839838263961724e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.054609e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.302533e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.604962e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.611044e+06                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156021917867366] fbridge_mode=1
- [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3906s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3880s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0022s for     8192 events => throughput is 3.78E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.3045 [0.30449447031649013] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4176s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4150s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0023s for     8192 events => throughput is 3.63E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31156027201869302) and cpp (0.31156021917867366) differ by less than 4E-4 (1.695980652582918e-07)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449447031649013) differ by less than 4E-4 (1.744457354124762e-07)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.311 [0.31098551029624061] fbridge_mode=1
- [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3833s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3593s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0236s for    90112 events => throughput is 3.82E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.3075 [0.30747305508949557] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6057s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5839s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0214s for    81920 events => throughput is 3.82E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31098556244384407) and cpp (0.31098551029624061) differ by less than 4E-4 (1.6768496602370675e-07)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305508949557) differ by less than 4E-4 (1.6955166515231213e-07)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.396678e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.359914e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.568316e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.606033e+06                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156022290359153] fbridge_mode=1
- [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8179s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8164s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.47E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0009s
+ [XSECTION] Cross section = 0.3045 [0.30449447352014630] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8553s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8518s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0030s for     8192 events => throughput is 2.72E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31156027201869302) and cuda (0.31156022290359153) differ by less than 4E-4 (1.576423758198331e-07)
+OK! xsec from fortran (0.30449452343426120) and cuda (0.30449447352014630) differ by less than 4E-4 (1.639245078566276e-07)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.311 [0.31098551341908548] fbridge_mode=1
- [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7841s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7780s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0052s for    90112 events => throughput is 1.74E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0009s
+ [XSECTION] Cross section = 0.3075 [0.30747305761315818] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0187s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0105s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0077s for    81920 events => throughput is 1.06E+07 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31098556244384407) and cuda (0.31098551341908548) differ by less than 4E-4 (1.5764319793998283e-07)
+OK! xsec from fortran (0.30747310722207288) and cuda (0.30747305761315818) differ by less than 4E-4 (1.6134391445099538e-07)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.109830e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.132925e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.241375e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.463748e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.618711e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.452376e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.567861e+09                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.074682e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.588931e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.432547e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.665083e+09                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.996097e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.844966e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.096714e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.699487e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.649902e+08                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt
index daf5b737bc..d3c2ed78ae 100644
--- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt
@@ -3,9 +3,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s
 make USEBUILDDIR=1 BACKEND=cuda
 
 
+make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppsse4
 
-make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppavx2
 
 make USEBUILDDIR=1 BACKEND=cpp512y
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_05:07:15
+DATE: 2024-09-18_17:35:21
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0
- [UNWEIGHT] Wrote 1767 events (found 4306 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6646s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6556s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0090s for     8192 events => throughput is 9.06E+05 events/s
+ [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0
+ [UNWEIGHT] Wrote 1732 events (found 4297 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6928s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6841s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0087s for     8192 events => throughput is 9.39E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0
- [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3932s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3846s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0086s for     8192 events => throughput is 9.53E+05 events/s
+ [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4220s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4135s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0085s for     8192 events => throughput is 9.58E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.311 [0.31098556244384407] fbridge_mode=0
- [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4482s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3542s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0940s for    90112 events => throughput is 9.59E+05 events/s
+ [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6593s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5746s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0847s for    81920 events => throughput is 9.68E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156028014369008] fbridge_mode=1
- [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3955s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3864s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0086s for     8192 events => throughput is 9.48E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.3045 [0.30449453160892032] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4261s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4172s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0085s for     8192 events => throughput is 9.60E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31156027201869302) and cpp (0.31156028014369008) differ by less than 2E-4 (2.6078411874408403e-08)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453160892032) differ by less than 2E-4 (2.6846654010981297e-08)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.311 [0.31098557069460298] fbridge_mode=1
- [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4328s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3379s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0944s for    90112 events => throughput is 9.54E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.3075 [0.30747311535940236] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6599s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5747s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0848s for    81920 events => throughput is 9.66E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31098556244384407) and cpp (0.31098557069460298) differ by less than 2E-4 (2.6531003172181045e-08)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311535940236) differ by less than 2E-4 (2.6465174718381945e-08)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.887098e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.812593e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.512638e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.833045e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156028014369008] fbridge_mode=1
- [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3896s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3847s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0045s for     8192 events => throughput is 1.80E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.3045 [0.30449453160892032] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4216s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4168s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0045s for     8192 events => throughput is 1.83E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31156027201869302) and cpp (0.31156028014369008) differ by less than 2E-4 (2.6078411874408403e-08)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453160892032) differ by less than 2E-4 (2.6846654010981297e-08)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.311 [0.31098557069460298] fbridge_mode=1
- [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3924s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3423s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0497s for    90112 events => throughput is 1.81E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.3075 [0.30747311535940236] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6345s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5889s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0453s for    81920 events => throughput is 1.81E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31098556244384407) and cpp (0.31098557069460298) differ by less than 2E-4 (2.6531003172181045e-08)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311535940236) differ by less than 2E-4 (2.6465174718381945e-08)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.970137e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.934567e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.025977e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.973267e+06                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156028097537258] fbridge_mode=1
- [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3875s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3843s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0028s for     8192 events => throughput is 2.96E+06 events/s
+ [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4198s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4167s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0028s for     8192 events => throughput is 2.92E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31156027201869302) and cpp (0.31156028097537258) differ by less than 2E-4 (2.8747823010988327e-08)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453255288433) differ by less than 2E-4 (2.99467557418609e-08)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.311 [0.31098557141632605] fbridge_mode=1
- [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3703s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3390s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0309s for    90112 events => throughput is 2.92E+06 events/s
+ [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6044s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5763s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0278s for    81920 events => throughput is 2.95E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31098556244384407) and cpp (0.31098557141632605) differ by less than 2E-4 (2.8851763866910574e-08)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311619894635) differ by less than 2E-4 (2.9195637685219822e-08)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.209026e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.181731e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.431270e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.488610e+06                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156028097537258] fbridge_mode=1
- [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3876s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3845s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0027s for     8192 events => throughput is 3.04E+06 events/s
+ [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4215s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4184s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0028s for     8192 events => throughput is 2.97E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31156027201869302) and cpp (0.31156028097537258) differ by less than 2E-4 (2.8747823010988327e-08)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453255288433) differ by less than 2E-4 (2.99467557418609e-08)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.311 [0.31098557141632605] fbridge_mode=1
- [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3745s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3437s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0304s for    90112 events => throughput is 2.96E+06 events/s
+ [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6145s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5872s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0269s for    81920 events => throughput is 3.04E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31098556244384407) and cpp (0.31098557141632605) differ by less than 2E-4 (2.8851763866910574e-08)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311619894635) differ by less than 2E-4 (2.9195637685219822e-08)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.278252e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.215852e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.611379e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.572337e+06                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156028097537258] fbridge_mode=1
- [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3891s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3855s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.64E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4203s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4167s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0032s for     8192 events => throughput is 2.60E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31156027201869302) and cpp (0.31156028097537258) differ by less than 2E-4 (2.8747823010988327e-08)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453255288433) differ by less than 2E-4 (2.99467557418609e-08)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.311 [0.31098557141632605] fbridge_mode=1
- [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3783s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3437s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0341s for    90112 events => throughput is 2.64E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6110s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5806s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0301s for    81920 events => throughput is 2.73E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31098556244384407) and cpp (0.31098557141632605) differ by less than 2E-4 (2.8851763866910574e-08)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311619894635) differ by less than 2E-4 (2.9195637685219822e-08)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.908043e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.871798e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.094537e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.069203e+06                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3116 [0.31156027194560187] fbridge_mode=1
- [UNWEIGHT] Wrote 1636 events (found 1641 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8165s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8154s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.40E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.3045 [0.30449452360186230] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8557s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8521s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.66E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31156027201869302) and cuda (0.31156027194560187) differ by less than 2E-4 (2.345971195083507e-10)
+OK! xsec from fortran (0.30449452343426120) and cuda (0.30449452360186230) differ by less than 2E-4 (5.504239286580059e-10)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.311 [0.31098556243340819] fbridge_mode=1
- [UNWEIGHT] Wrote 1828 events (found 1833 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7739s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7677s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0056s for    90112 events => throughput is 1.61E+07 events/s
+ [XSECTION] Cross section = 0.3075 [0.30747310720557364] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0158s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0075s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0078s for    81920 events => throughput is 1.05E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.31098556244384407) and cuda (0.31098556243340819) differ by less than 2E-4 (3.3557379097715057e-11)
+OK! xsec from fortran (0.30747310722207288) and cuda (0.30747310720557364) differ by less than 2E-4 (5.366074251611508e-11)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.052309e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.203370e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.981616e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.488049e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.109375e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.465715e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.503882e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.776763e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.117951e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.434433e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.883236e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.877036e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.131877e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.445366e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.291537e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.175182e+08                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt
index c4c5ee1ec5..e14403d083 100644
--- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt
@@ -1,15 +1,15 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 
-
 make USEBUILDDIR=1 BACKEND=cuda
-make USEBUILDDIR=1 BACKEND=cppnone
 
 
+make USEBUILDDIR=1 BACKEND=cppnone
+
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
 
-make USEBUILDDIR=1 BACKEND=cpp512y
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make USEBUILDDIR=1 BACKEND=cpp512y
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_05:05:08
+DATE: 2024-09-18_17:33:04
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0
- [UNWEIGHT] Wrote 2620 events (found 5403 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8194s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7770s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0423s for     8192 events => throughput is 1.94E+05 events/s
+ [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0
+ [UNWEIGHT] Wrote 2625 events (found 5368 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8478s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8053s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0425s for     8192 events => throughput is 1.93E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4148s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3722s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0426s for     8192 events => throughput is 1.92E+05 events/s
+ [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4470s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4043s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0427s for     8192 events => throughput is 1.92E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0
- [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7361s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2691s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.4670s for    90112 events => throughput is 1.93E+05 events/s
+ [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.9546s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5320s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4226s for    81920 events => throughput is 1.94E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598860065419863] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4179s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3727s
+ [XSECTION] Cross section = 44.64 [44.641911695846964] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4564s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4114s
  [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0447s for     8192 events => throughput is 1.83E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.598860065419856) and cpp (44.598860065419863) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846964) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=1
- [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7703s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2738s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4960s for    90112 events => throughput is 1.82E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 44.47 [44.473264592444664] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.9966s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5483s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4479s for    81920 events => throughput is 1.83E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.577523870256456) and cpp (44.577523870256471) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444664) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.858086e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.854967e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.858737e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.848854e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3976s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3721s
+ [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4357s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4103s
  [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0251s for     8192 events => throughput is 3.26E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.598860065419856) and cpp (44.598860065419856) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846957) differ by less than 3E-14 (0.0)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=1
- [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5481s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2696s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2781s for    90112 events => throughput is 3.24E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.8082s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5526s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2552s for    81920 events => throughput is 3.21E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.577523870256456) and cpp (44.577523870256471) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444671) differ by less than 3E-14 (0.0)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.271451e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.238674e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.291022e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.323356e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3883s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3723s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0156s for     8192 events => throughput is 5.25E+05 events/s
+ [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4251s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4091s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0157s for     8192 events => throughput is 5.22E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.598860065419856) and cpp (44.598860065419856) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1
- [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4494s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2754s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1736s for    90112 events => throughput is 5.19E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6875s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5321s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1550s for    81920 events => throughput is 5.28E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.577523870256456) and cpp (44.577523870256485) differ by less than 3E-14 (6.661338147750939e-16)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444679) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.204785e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.205465e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.255139e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.341963e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3930s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3775s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0150s for     8192 events => throughput is 5.47E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4231s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4089s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0139s for     8192 events => throughput is 5.90E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.598860065419856) and cpp (44.598860065419856) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1
- [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4285s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2693s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1588s for    90112 events => throughput is 5.67E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6853s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5431s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1419s for    81920 events => throughput is 5.77E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.577523870256456) and cpp (44.577523870256485) differ by less than 3E-14 (6.661338147750939e-16)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444679) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.679020e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.704465e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.812224e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.807181e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3962s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3733s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0224s for     8192 events => throughput is 3.66E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4336s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4101s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0231s for     8192 events => throughput is 3.55E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.598860065419856) and cpp (44.598860065419856) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1
- [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5262s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2749s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2508s for    90112 events => throughput is 3.59E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7690s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5437s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2250s for    81920 events => throughput is 3.64E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.577523870256456) and cpp (44.577523870256485) differ by less than 3E-14 (6.661338147750939e-16)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444671) differ by less than 3E-14 (0.0)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.564809e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.577527e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.622774e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.629127e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8045s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8031s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0007s for     8192 events => throughput is 1.25E+07 events/s
+ [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8492s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8454s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.63E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.598860065419856) and cuda (44.598860065419849) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (44.641911695846957) and cuda (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1
- [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7059s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.6982s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0068s for    90112 events => throughput is 1.32E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.9952s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9855s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0089s for    81920 events => throughput is 9.15E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.577523870256456) and cuda (44.577523870256485) differ by less than 3E-14 (6.661338147750939e-16)
+OK! xsec from fortran (44.473264592444671) and cuda (44.473264592444679) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.039338e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.949285e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.623593e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.317105e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.266679e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.829932e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.078171e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.548750e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.260477e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.793745e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.153595e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.913836e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.275336e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.818636e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.083603e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.654381e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt
index 8dec5eb758..a972218890 100644
--- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt
@@ -1,22 +1,22 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 
-make USEBUILDDIR=1 BACKEND=cuda
 
+make USEBUILDDIR=1 BACKEND=cuda
 
 make USEBUILDDIR=1 BACKEND=cppnone
+
 make USEBUILDDIR=1 BACKEND=cppsse4
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-
-make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make USEBUILDDIR=1 BACKEND=cpp512y
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_05:05:34
+DATE: 2024-09-18_17:33:33
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0
- [UNWEIGHT] Wrote 2620 events (found 5403 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8122s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7694s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0428s for     8192 events => throughput is 1.91E+05 events/s
+ [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0
+ [UNWEIGHT] Wrote 2625 events (found 5368 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8346s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7928s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0418s for     8192 events => throughput is 1.96E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4148s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3721s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0427s for     8192 events => throughput is 1.92E+05 events/s
+ [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4474s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4056s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0418s for     8192 events => throughput is 1.96E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0
- [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7463s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2808s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.4655s for    90112 events => throughput is 1.94E+05 events/s
+ [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.9534s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5332s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4202s for    81920 events => throughput is 1.95E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598853620719339] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4160s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3732s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0424s for     8192 events => throughput is 1.93E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 44.64 [44.641906072918047] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4536s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4103s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0430s for     8192 events => throughput is 1.91E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.598860065419856) and cpp (44.598853620719339) differ by less than 4E-4 (1.4450370500185272e-07)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641906072918047) differ by less than 4E-4 (1.2595627507661078e-07)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.58 [44.577522280119403] fbridge_mode=1
- [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7344s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2681s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4659s for    90112 events => throughput is 1.93E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 44.47 [44.473258789404959] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.9508s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5325s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4180s for    81920 events => throughput is 1.96E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.577523870256456) and cpp (44.577522280119403) differ by less than 4E-4 (3.567127371262302e-08)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473258789404959) differ by less than 4E-4 (1.3048378089131063e-07)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.951237e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.983462e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.969815e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.977072e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598849697851406] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3892s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3716s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0172s for     8192 events => throughput is 4.75E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 44.64 [44.641902189470080] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4272s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4098s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0172s for     8192 events => throughput is 4.77E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.598860065419856) and cpp (44.598849697851406) differ by less than 4E-4 (2.3246263325393812e-07)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641902189470080) differ by less than 4E-4 (2.1294735186305758e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.58 [44.577518590213366] fbridge_mode=1
- [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4598s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2700s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1895s for    90112 events => throughput is 4.76E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 44.47 [44.473255074265531] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7014s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5300s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1712s for    81920 events => throughput is 4.79E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.577523870256456) and cpp (44.577518590213366) differ by less than 4E-4 (1.1844630731783212e-07)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473255074265531) differ by less than 4E-4 (2.1402024852346102e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.698064e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.686204e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.736294e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.729824e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598850036412124] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3829s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3733s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0093s for     8192 events => throughput is 8.82E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 44.64 [44.641902360436738] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4171s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4080s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0089s for     8192 events => throughput is 9.21E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.598860065419856) and cpp (44.598850036412124) differ by less than 4E-4 (2.2487139172966408e-07)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641902360436738) differ by less than 4E-4 (2.0911761755559866e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.58 [44.577518612400254] fbridge_mode=1
- [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3681s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2678s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0999s for    90112 events => throughput is 9.02E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 44.47 [44.473254628666531] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6247s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5346s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0899s for    81920 events => throughput is 9.11E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.577523870256456) and cpp (44.577518612400254) differ by less than 4E-4 (1.1794859255953583e-07)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473254628666531) differ by less than 4E-4 (2.240397288799656e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.141420e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.731978e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.161888e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.141403e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598850036412124] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3833s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3744s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0087s for     8192 events => throughput is 9.46E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 44.64 [44.641902360436738] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4189s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4099s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0088s for     8192 events => throughput is 9.27E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.598860065419856) and cpp (44.598850036412124) differ by less than 4E-4 (2.2487139172966408e-07)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641902360436738) differ by less than 4E-4 (2.0911761755559866e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.58 [44.577518612400254] fbridge_mode=1
- [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3696s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2744s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0950s for    90112 events => throughput is 9.49E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 44.47 [44.473254628666531] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6246s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5387s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0857s for    81920 events => throughput is 9.56E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.577523870256456) and cpp (44.577518612400254) differ by less than 4E-4 (1.1794859255953583e-07)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473254628666531) differ by less than 4E-4 (2.240397288799656e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.541329e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.618487e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.771909e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.810592e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598854350242270] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3864s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3739s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0121s for     8192 events => throughput is 6.77E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 44.64 [44.641906399820272] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4241s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4113s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0125s for     8192 events => throughput is 6.53E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.598860065419856) and cpp (44.598854350242270) differ by less than 4E-4 (1.2814627048385319e-07)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641906399820272) differ by less than 4E-4 (1.1863351012664225e-07)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.58 [44.577522751628507] fbridge_mode=1
- [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4212s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2851s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1357s for    90112 events => throughput is 6.64E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 44.47 [44.473258854390501] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6706s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5504s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1199s for    81920 events => throughput is 6.83E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.577523870256456) and cpp (44.577522751628507) differ by less than 4E-4 (2.5093990219104967e-08)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473258854390501) differ by less than 4E-4 (1.2902255375202287e-07)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.751639e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.869027e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.922683e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.899392e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598870301426373] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8028s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8015s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.44E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 44.64 [44.641910992291372] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8527s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8491s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.67E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.598860065419856) and cuda (44.598870301426373) differ by less than 4E-4 (2.2951273881410827e-07)
+OK! xsec from fortran (44.641911695846957) and cuda (44.641910992291372) differ by less than 4E-4 (1.575997887748315e-08)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.58 [44.577527268256027] fbridge_mode=1
- [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7031s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.6965s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0059s for    90112 events => throughput is 1.52E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 44.47 [44.473262664842089] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.9894s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9808s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0080s for    81920 events => throughput is 1.03E+07 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.577523870256456) and cuda (44.577527268256027) differ by less than 4E-4 (7.622674558227516e-08)
+OK! xsec from fortran (44.473264592444671) and cuda (44.473262664842089) differ by less than 4E-4 (4.334295222729878e-08)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.981782e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.991468e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.227555e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.344514e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.915612e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.881682e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.420308e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.350971e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.890432e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.826185e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.551435e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.350870e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.474710e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.507679e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.460630e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.018982e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt
index 426277ef12..f3cbf0c54f 100644
--- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt
@@ -1,22 +1,22 @@
 Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 
-
 make USEBUILDDIR=1 BACKEND=cuda
 
-make USEBUILDDIR=1 BACKEND=cppnone
 
+make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppsse4
-make USEBUILDDIR=1 BACKEND=cppavx2
 
+
+make USEBUILDDIR=1 BACKEND=cppavx2
 make USEBUILDDIR=1 BACKEND=cpp512y
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
@@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/
 
 OMP_NUM_THREADS=
 
-DATE: 2024-08-30_05:06:00
+DATE: 2024-09-18_17:34:00
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0
- [UNWEIGHT] Wrote 2620 events (found 5403 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8205s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7782s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0423s for     8192 events => throughput is 1.94E+05 events/s
+ [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0
+ [UNWEIGHT] Wrote 2625 events (found 5368 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8326s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7908s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0418s for     8192 events => throughput is 1.96E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4194s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3765s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0429s for     8192 events => throughput is 1.91E+05 events/s
+ [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4514s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4081s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0433s for     8192 events => throughput is 1.89E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0
- [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7506s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2806s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.4701s for    90112 events => throughput is 1.92E+05 events/s
+ [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.9646s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5417s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4230s for    81920 events => throughput is 1.94E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598861353577519] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4273s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3798s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0470s for     8192 events => throughput is 1.74E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 44.64 [44.641912938404218] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4612s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4163s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0445s for     8192 events => throughput is 1.84E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.598860065419856) and cpp (44.598861353577519) differ by less than 2E-4 (2.888319694527297e-08)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641912938404218) differ by less than 2E-4 (2.783387209603916e-08)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.58 [44.577525144126803] fbridge_mode=1
- [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7703s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2698s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5000s for    90112 events => throughput is 1.80E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 44.47 [44.473265850735231] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.9868s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5362s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4502s for    81920 events => throughput is 1.82E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.577523870256456) and cpp (44.577525144126803) differ by less than 2E-4 (2.8576516486467085e-08)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473265850735231) differ by less than 2E-4 (2.8293190679207214e-08)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.827224e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.844411e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.858385e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.856447e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598861353577519] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4001s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3740s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0256s for     8192 events => throughput is 3.20E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 44.64 [44.641912938404218] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4369s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4119s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0247s for     8192 events => throughput is 3.31E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.598860065419856) and cpp (44.598861353577519) differ by less than 2E-4 (2.888319694527297e-08)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641912938404218) differ by less than 2E-4 (2.783387209603916e-08)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.58 [44.577525144126810] fbridge_mode=1
- [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5448s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2672s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2772s for    90112 events => throughput is 3.25E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 44.47 [44.473265850735231] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7808s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5352s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2452s for    81920 events => throughput is 3.34E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.577523870256456) and cpp (44.577525144126810) differ by less than 2E-4 (2.857651670851169e-08)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473265850735231) differ by less than 2E-4 (2.8293190679207214e-08)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.299374e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.286947e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.333877e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.353817e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598861344883289] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3941s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3783s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0154s for     8192 events => throughput is 5.30E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4259s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4101s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0155s for     8192 events => throughput is 5.30E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.598860065419856) and cpp (44.598861344883289) differ by less than 2E-4 (2.868825421664667e-08)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641912970378179) differ by less than 2E-4 (2.8550104058666648e-08)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.58 [44.577525178109212] fbridge_mode=1
- [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4525s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2782s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1739s for    90112 events => throughput is 5.18E+05 events/s
+ [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6932s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5410s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1518s for    81920 events => throughput is 5.39E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.577523870256456) and cpp (44.577525178109212) differ by less than 2E-4 (2.9338838025694258e-08)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473265889684782) differ by less than 2E-4 (2.9168987669692115e-08)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.280897e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.355089e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.361948e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.376897e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598861344883289] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3861s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3714s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0142s for     8192 events => throughput is 5.77E+05 events/s
+ [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4277s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4132s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0141s for     8192 events => throughput is 5.80E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.598860065419856) and cpp (44.598861344883289) differ by less than 2E-4 (2.868825421664667e-08)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641912970378179) differ by less than 2E-4 (2.8550104058666648e-08)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.58 [44.577525178109212] fbridge_mode=1
- [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4270s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2702s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1563s for    90112 events => throughput is 5.77E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6947s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5520s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1424s for    81920 events => throughput is 5.75E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.577523870256456) and cpp (44.577525178109212) differ by less than 2E-4 (2.9338838025694258e-08)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473265889684782) differ by less than 2E-4 (2.9168987669692115e-08)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.837527e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.878311e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.870753e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.966522e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598861344883289] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3960s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3736s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0219s for     8192 events => throughput is 3.74E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4311s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4086s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0221s for     8192 events => throughput is 3.70E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.598860065419856) and cpp (44.598861344883289) differ by less than 2E-4 (2.868825421664667e-08)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641912970378179) differ by less than 2E-4 (2.8550104058666648e-08)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.58 [44.577525178109212] fbridge_mode=1
- [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5174s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2715s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2453s for    90112 events => throughput is 3.67E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7576s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5385s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2187s for    81920 events => throughput is 3.74E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.577523870256456) and cpp (44.577525178109212) differ by less than 2E-4 (2.9338838025694258e-08)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473265889684782) differ by less than 2E-4 (2.9168987669692115e-08)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.623653e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.691521e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.691171e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.773666e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.6 [44.598860056955807] fbridge_mode=1
- [UNWEIGHT] Wrote 1603 events (found 1608 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8038s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8024s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0006s for     8192 events => throughput is 1.30E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 44.64 [44.641911674225568] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8481s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8441s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0032s for     8192 events => throughput is 2.54E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.598860065419856) and cuda (44.598860056955807) differ by less than 2E-4 (1.8978174587402918e-10)
+OK! xsec from fortran (44.641911695846957) and cuda (44.641911674225568) differ by less than 2E-4 (4.843293543999039e-10)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.58 [44.577523872560512] fbridge_mode=1
- [UNWEIGHT] Wrote 1743 events (found 1748 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7081s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7005s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0068s for    90112 events => throughput is 1.33E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 44.47 [44.473264587763374] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.9812s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9714s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0090s for    81920 events => throughput is 9.08E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.577523870256456) and cuda (44.577523872560512) differ by less than 2E-4 (5.168643291142416e-11)
+OK! xsec from fortran (44.473264592444671) and cuda (44.473264587763374) differ by less than 2E-4 (1.0526091109852587e-10)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.992048e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.958191e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.630705e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.401140e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.263161e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.815576e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.062051e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.499893e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.277099e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.820308e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.139998e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.845220e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.271500e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.813891e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.946602e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.729165e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt
index d8009f6b3e..95eb3e309d 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 15s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-29_22:45:26
+DATE: 2024-09-18_12:08:39
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.803809e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.717028e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.166742e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.586175e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.543752e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.774580e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     0.901125 sec
+TOTAL       :     0.722428 sec
 INFO: No Floating Point Exceptions have been reported
-     2,657,428,250      cycles                           #    2.848 GHz                    
-     4,095,613,701      instructions                     #    1.54  insn per cycle         
-       1.213739460 seconds time elapsed
+     2,618,484,542      cycles                           #    2.848 GHz                    
+     4,056,431,697      instructions                     #    1.55  insn per cycle         
+       1.017935073 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.028409e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.198012e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.198012e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.036688e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.208211e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.208211e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     6.530303 sec
+TOTAL       :     6.477862 sec
 INFO: No Floating Point Exceptions have been reported
-    19,075,896,141      cycles                           #    2.919 GHz                    
-    46,074,311,860      instructions                     #    2.42  insn per cycle         
-       6.535837772 seconds time elapsed
+    19,055,513,200      cycles                           #    2.940 GHz                    
+    46,088,548,361      instructions                     #    2.42  insn per cycle         
+       6.483409710 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  463) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.570712e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.047478e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.047478e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.577995e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.056243e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.056243e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.420809 sec
+TOTAL       :     4.401175 sec
 INFO: No Floating Point Exceptions have been reported
-    12,935,671,986      cycles                           #    2.923 GHz                    
-    31,611,096,814      instructions                     #    2.44  insn per cycle         
-       4.426216014 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1664) (avx2:    0) (512y:    0) (512z:    0)
+    12,945,161,675      cycles                           #    2.938 GHz                    
+    31,621,534,754      instructions                     #    2.44  insn per cycle         
+       4.406822784 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.982246e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.749049e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.749049e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.979178e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.760192e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.760192e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.584392 sec
+TOTAL       :     3.589167 sec
 INFO: No Floating Point Exceptions have been reported
-    10,005,997,843      cycles                           #    2.788 GHz                    
-    19,602,170,267      instructions                     #    1.96  insn per cycle         
-       3.589884459 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1946) (512y:    0) (512z:    0)
+    10,070,726,803      cycles                           #    2.802 GHz                    
+    19,587,544,877      instructions                     #    1.94  insn per cycle         
+       3.594697986 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1909) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.034742e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.851215e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.851215e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.973756e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.755912e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.755912e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.501989 sec
+TOTAL       :     3.601272 sec
 INFO: No Floating Point Exceptions have been reported
-     9,776,595,596      cycles                           #    2.788 GHz                    
-    19,251,276,525      instructions                     #    1.97  insn per cycle         
-       3.507337404 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1685) (512y:  178) (512z:    0)
+     9,893,708,282      cycles                           #    2.744 GHz                    
+    19,261,714,155      instructions                     #    1.95  insn per cycle         
+       3.606677205 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1647) (512y:  180) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.721753e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.269385e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.269385e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.684138e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.223088e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.223088e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.065497 sec
+TOTAL       :     4.152604 sec
 INFO: No Floating Point Exceptions have been reported
-     8,584,082,989      cycles                           #    2.109 GHz                    
-    15,723,059,479      instructions                     #    1.83  insn per cycle         
-       4.070951861 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  880) (512y:  156) (512z: 1257)
+     8,635,892,874      cycles                           #    2.077 GHz                    
+    15,755,316,929      instructions                     #    1.82  insn per cycle         
+       4.158382190 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  871) (512y:  156) (512z: 1258)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt
index 15c86e64f8..7e1127db04 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 13s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-29_23:27:07
+DATE: 2024-09-18_12:51:56
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -57,15 +53,16 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.624887e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.702462e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.702462e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.746451e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.921944e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.921944e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     2.264047 sec
+TOTAL       :     2.223700 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     7,265,794,550      cycles                           #    2.898 GHz                    
-    13,119,795,658      instructions                     #    1.81  insn per cycle         
-       2.566395516 seconds time elapsed
+     7,220,060,160      cycles                           #    2.915 GHz                    
+    13,018,391,047      instructions                     #    1.80  insn per cycle         
+       2.533250665 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
@@ -76,7 +73,10 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -95,20 +95,24 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.965625e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.154434e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.154434e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.004941e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.165788e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.165788e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     6.932007 sec
+TOTAL       :     6.898046 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    20,279,302,832      cycles                           #    2.923 GHz                    
-    46,300,903,612      instructions                     #    2.28  insn per cycle         
-       6.938668002 seconds time elapsed
+    20,322,657,427      cycles                           #    2.944 GHz                    
+    46,321,216,193      instructions                     #    2.28  insn per cycle         
+       6.904944789 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  463) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -125,20 +129,24 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.493239e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.913977e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.913977e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.496333e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.925589e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.925589e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.834619 sec
+TOTAL       :     4.836326 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    14,076,513,939      cycles                           #    2.908 GHz                    
-    32,453,787,450      instructions                     #    2.31  insn per cycle         
-       4.841215294 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1664) (avx2:    0) (512y:    0) (512z:    0)
+    14,226,515,518      cycles                           #    2.937 GHz                    
+    32,466,683,813      instructions                     #    2.28  insn per cycle         
+       4.843971134 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -155,20 +163,24 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.862502e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.532080e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.532080e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.825666e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.487837e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.487837e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.005847 sec
+TOTAL       :     4.083516 sec
 INFO: No Floating Point Exceptions have been reported
-    11,211,606,490      cycles                           #    2.795 GHz                    
-    20,962,455,249      instructions                     #    1.87  insn per cycle         
-       4.012539970 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1946) (512y:    0) (512z:    0)
+INFO: No Floating Point Exceptions have been reported
+    11,316,310,914      cycles                           #    2.767 GHz                    
+    20,951,601,246      instructions                     #    1.85  insn per cycle         
+       4.090897830 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1909) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -185,20 +197,24 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.896245e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.585584e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.585584e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.895357e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.603837e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.603837e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.947050 sec
+TOTAL       :     3.954670 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    11,037,536,290      cycles                           #    2.793 GHz                    
-    20,609,974,645      instructions                     #    1.87  insn per cycle         
-       3.953712362 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1685) (512y:  178) (512z:    0)
+    11,188,953,637      cycles                           #    2.824 GHz                    
+    20,622,311,623      instructions                     #    1.84  insn per cycle         
+       3.962452110 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1647) (512y:  180) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -215,20 +231,24 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.615205e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.087554e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.087554e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.623904e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.111036e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.111036e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.531367 sec
+TOTAL       :     4.511201 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     9,875,108,333      cycles                           #    2.177 GHz                    
-    16,869,911,181      instructions                     #    1.71  insn per cycle         
-       4.538028145 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  880) (512y:  156) (512z: 1257)
+     9,933,844,941      cycles                           #    2.199 GHz                    
+    16,904,875,780      instructions                     #    1.70  insn per cycle         
+       4.518707685 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  871) (512y:  156) (512z: 1258)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt
index 92ef4f6f2f..d91c4828d9 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 01s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-29_23:38:45
+DATE: 2024-09-18_13:04:11
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.808918e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.668095e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.116734e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.531377e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.591267e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.748328e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371879e-02 +- 3.270020e-06 )  GeV^0
-TOTAL       :     1.369794 sec
+TOTAL       :     1.353084 sec
 INFO: No Floating Point Exceptions have been reported
-     4,608,699,021      cycles                           #    2.885 GHz                    
-     7,094,580,310      instructions                     #    1.54  insn per cycle         
-       1.655641768 seconds time elapsed
+     4,633,251,875      cycles                           #    2.904 GHz                    
+     7,212,974,866      instructions                     #    1.56  insn per cycle         
+       1.652016166 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.028121e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.196836e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.196836e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.029394e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.199449e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.199449e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371879e-02 +- 3.270020e-06 )  GeV^0
-TOTAL       :     6.905360 sec
+TOTAL       :     6.897530 sec
 INFO: No Floating Point Exceptions have been reported
-    20,204,972,387      cycles                           #    2.924 GHz                    
-    46,176,989,685      instructions                     #    2.29  insn per cycle         
-       6.910743496 seconds time elapsed
+    20,162,123,319      cycles                           #    2.922 GHz                    
+    46,195,009,239      instructions                     #    2.29  insn per cycle         
+       6.903032860 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  463) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.559907e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.028662e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.028662e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.570184e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.049072e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.049072e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371879e-02 +- 3.270020e-06 )  GeV^0
-TOTAL       :     4.821292 sec
+TOTAL       :     4.798502 sec
 INFO: No Floating Point Exceptions have been reported
-    14,003,649,408      cycles                           #    2.902 GHz                    
-    31,613,857,025      instructions                     #    2.26  insn per cycle         
-       4.826776329 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1664) (avx2:    0) (512y:    0) (512z:    0)
+    14,063,092,419      cycles                           #    2.928 GHz                    
+    31,626,728,543      instructions                     #    2.25  insn per cycle         
+       4.804471582 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.980860e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.738326e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.738326e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.970535e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.757812e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.757812e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371879e-02 +- 3.270020e-06 )  GeV^0
-TOTAL       :     3.957963 sec
+TOTAL       :     3.978190 sec
 INFO: No Floating Point Exceptions have been reported
-    11,113,367,910      cycles                           #    2.805 GHz                    
-    19,502,073,857      instructions                     #    1.75  insn per cycle         
-       3.963320231 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1946) (512y:    0) (512z:    0)
+    11,201,119,802      cycles                           #    2.813 GHz                    
+    19,490,103,913      instructions                     #    1.74  insn per cycle         
+       3.984105389 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1909) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.009415e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.801431e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.801431e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.023963e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.847631e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.847631e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371879e-02 +- 3.270020e-06 )  GeV^0
-TOTAL       :     3.927255 sec
+TOTAL       :     3.897854 sec
 INFO: No Floating Point Exceptions have been reported
-    10,925,795,299      cycles                           #    2.779 GHz                    
-    18,950,076,288      instructions                     #    1.73  insn per cycle         
-       3.932774858 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1685) (512y:  178) (512z:    0)
+    11,011,148,409      cycles                           #    2.821 GHz                    
+    18,950,488,449      instructions                     #    1.72  insn per cycle         
+       3.903822013 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1647) (512y:  180) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.705280e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.276454e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.276454e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.727909e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.291192e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.291192e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371879e-02 +- 3.270020e-06 )  GeV^0
-TOTAL       :     4.480076 sec
+TOTAL       :     4.434907 sec
 INFO: No Floating Point Exceptions have been reported
-     9,807,871,817      cycles                           #    2.187 GHz                    
-    15,426,367,312      instructions                     #    1.57  insn per cycle         
-       4.485479236 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  880) (512y:  156) (512z: 1257)
+     9,769,161,551      cycles                           #    2.200 GHz                    
+    15,456,644,765      instructions                     #    1.58  insn per cycle         
+       4.440874371 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  871) (512y:  156) (512z: 1258)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt
index 10b54c1ef0..bb28d7f936 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 00s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-29_23:35:59
+DATE: 2024-09-18_13:01:24
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.797696e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.680633e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.151980e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.590069e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.635013e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.812541e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     0.987290 sec
+TOTAL       :     0.989483 sec
 INFO: No Floating Point Exceptions have been reported
-     3,494,786,482      cycles                           #    2.873 GHz                    
-     6,971,888,639      instructions                     #    1.99  insn per cycle         
-       1.273208272 seconds time elapsed
+     3,546,265,877      cycles                           #    2.891 GHz                    
+     7,041,909,652      instructions                     #    1.99  insn per cycle         
+       1.283490715 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.029433e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.198441e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.198441e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.026273e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.198046e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.198046e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     6.521930 sec
+TOTAL       :     6.545718 sec
 INFO: No Floating Point Exceptions have been reported
-    19,054,645,226      cycles                           #    2.920 GHz                    
-    46,074,013,678      instructions                     #    2.42  insn per cycle         
-       6.527229224 seconds time elapsed
+    19,118,940,916      cycles                           #    2.919 GHz                    
+    46,090,671,775      instructions                     #    2.41  insn per cycle         
+       6.551190549 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  463) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.573928e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.046992e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.046992e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.571851e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.052236e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.052236e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.412728 sec
+TOTAL       :     4.417688 sec
 INFO: No Floating Point Exceptions have been reported
-    12,916,865,926      cycles                           #    2.924 GHz                    
-    31,611,168,331      instructions                     #    2.45  insn per cycle         
-       4.418232205 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1664) (avx2:    0) (512y:    0) (512z:    0)
+    12,968,655,547      cycles                           #    2.932 GHz                    
+    31,622,331,959      instructions                     #    2.44  insn per cycle         
+       4.423489151 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.976115e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.748052e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.748052e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.966543e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.746878e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.746878e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.594982 sec
+TOTAL       :     3.610850 sec
 INFO: No Floating Point Exceptions have been reported
-    10,006,568,639      cycles                           #    2.780 GHz                    
-    19,599,883,322      instructions                     #    1.96  insn per cycle         
-       3.600617857 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1946) (512y:    0) (512z:    0)
+    10,121,576,066      cycles                           #    2.799 GHz                    
+    19,587,082,892      instructions                     #    1.94  insn per cycle         
+       3.616728345 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1909) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.035014e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.847591e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.847591e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.021109e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.843929e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.843929e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.500601 sec
+TOTAL       :     3.520824 sec
 INFO: No Floating Point Exceptions have been reported
-     9,800,712,040      cycles                           #    2.796 GHz                    
-    19,261,863,073      instructions                     #    1.97  insn per cycle         
-       3.506017349 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1685) (512y:  178) (512z:    0)
+     9,901,674,637      cycles                           #    2.808 GHz                    
+    19,249,331,839      instructions                     #    1.94  insn per cycle         
+       3.526811564 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1647) (512y:  180) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.727394e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.281204e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.281204e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.723287e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.278399e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.278399e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.053717 sec
+TOTAL       :     4.062783 sec
 INFO: No Floating Point Exceptions have been reported
-     8,568,345,184      cycles                           #    2.111 GHz                    
-    15,722,985,461      instructions                     #    1.84  insn per cycle         
-       4.059101293 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  880) (512y:  156) (512z: 1257)
+     8,637,222,753      cycles                           #    2.123 GHz                    
+    15,755,461,061      instructions                     #    1.82  insn per cycle         
+       4.068672997 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  871) (512y:  156) (512z: 1258)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt
index fe5153c071..95f355ef67 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 01s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-29_23:33:12
+DATE: 2024-09-18_12:58:35
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -54,15 +50,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.065171e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.636973e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.050281e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.092066e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.598729e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.734259e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     1.887176 sec
+TOTAL       :     1.884241 sec
 INFO: No Floating Point Exceptions have been reported
-     6,145,978,060      cycles                           #    2.903 GHz                    
-    11,435,822,809      instructions                     #    1.86  insn per cycle         
-       2.173027806 seconds time elapsed
+     6,167,226,842      cycles                           #    2.911 GHz                    
+    11,436,463,316      instructions                     #    1.85  insn per cycle         
+       2.174841291 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166
@@ -70,7 +66,10 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -88,20 +87,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.029479e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.199057e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.199057e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.035778e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.207383e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.207383e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     6.523129 sec
+TOTAL       :     6.487731 sec
 INFO: No Floating Point Exceptions have been reported
-    19,067,053,861      cycles                           #    2.921 GHz                    
-    46,077,405,536      instructions                     #    2.42  insn per cycle         
-       6.528437903 seconds time elapsed
+    19,058,569,596      cycles                           #    2.936 GHz                    
+    46,087,741,277      instructions                     #    2.42  insn per cycle         
+       6.493592711 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  463) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -117,20 +119,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.548471e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.011932e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.011932e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.566016e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.044387e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.044387e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.480294 sec
+TOTAL       :     4.436390 sec
 INFO: No Floating Point Exceptions have been reported
-    12,970,362,518      cycles                           #    2.896 GHz                    
-    31,615,405,279      instructions                     #    2.44  insn per cycle         
-       4.485805448 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1664) (avx2:    0) (512y:    0) (512z:    0)
+    12,971,922,098      cycles                           #    2.921 GHz                    
+    31,622,790,809      instructions                     #    2.44  insn per cycle         
+       4.442502369 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -146,20 +151,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.990807e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.763259e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.763259e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.978030e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.768932e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.768932e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.568074 sec
+TOTAL       :     3.592451 sec
 INFO: No Floating Point Exceptions have been reported
-     9,997,994,130      cycles                           #    2.799 GHz                    
-    19,601,663,150      instructions                     #    1.96  insn per cycle         
-       3.573387334 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1946) (512y:    0) (512z:    0)
+    10,115,574,971      cycles                           #    2.812 GHz                    
+    19,587,420,856      instructions                     #    1.94  insn per cycle         
+       3.598300355 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1909) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -175,20 +183,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.031513e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.838493e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.838493e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.014830e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.827477e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.827477e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.506716 sec
+TOTAL       :     3.531675 sec
 INFO: No Floating Point Exceptions have been reported
-     9,800,646,915      cycles                           #    2.791 GHz                    
-    19,248,975,591      instructions                     #    1.96  insn per cycle         
-       3.512205386 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1685) (512y:  178) (512z:    0)
+     9,897,196,547      cycles                           #    2.799 GHz                    
+    19,249,419,683      instructions                     #    1.94  insn per cycle         
+       3.537559003 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1647) (512y:  180) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -204,20 +215,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.737151e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.291381e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.291381e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.720646e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.279247e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.279247e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.030896 sec
+TOTAL       :     4.067635 sec
 INFO: No Floating Point Exceptions have been reported
-     8,561,312,861      cycles                           #    2.122 GHz                    
-    15,725,090,197      instructions                     #    1.84  insn per cycle         
-       4.036222457 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  880) (512y:  156) (512z: 1257)
+     8,664,270,263      cycles                           #    2.127 GHz                    
+    15,755,691,110      instructions                     #    1.82  insn per cycle         
+       4.073643316 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  871) (512y:  156) (512z: 1258)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt
index bf5eae53fe..e73a9b015a 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 49s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-29_22:45:57
+DATE: 2024-09-18_12:09:10
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.814972e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.742109e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.212342e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.079594e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.670378e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.825463e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     0.677183 sec
+TOTAL       :     0.680250 sec
 INFO: No Floating Point Exceptions have been reported
-     2,613,090,444      cycles                           #    2.872 GHz                    
-     4,047,105,274      instructions                     #    1.55  insn per cycle         
-       0.969507477 seconds time elapsed
+     2,578,534,884      cycles                           #    2.821 GHz                    
+     4,030,538,684      instructions                     #    1.56  insn per cycle         
+       0.973967444 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.029099e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.198310e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.198310e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.022698e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.191211e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.191211e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     6.524250 sec
+TOTAL       :     6.567027 sec
 INFO: No Floating Point Exceptions have been reported
-    19,046,558,619      cycles                           #    2.917 GHz                    
-    46,035,154,416      instructions                     #    2.42  insn per cycle         
-       6.529531773 seconds time elapsed
+    19,075,762,627      cycles                           #    2.903 GHz                    
+    46,055,106,551      instructions                     #    2.41  insn per cycle         
+       6.572547698 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  452) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.564780e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.031977e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.031977e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.585711e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.070341e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.070341e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.434953 sec
+TOTAL       :     4.383967 sec
 INFO: No Floating Point Exceptions have been reported
-    12,896,963,599      cycles                           #    2.905 GHz                    
-    31,585,225,315      instructions                     #    2.45  insn per cycle         
-       4.440300371 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1650) (avx2:    0) (512y:    0) (512z:    0)
+    12,890,625,740      cycles                           #    2.937 GHz                    
+    31,557,909,117      instructions                     #    2.45  insn per cycle         
+       4.389588631 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1648) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.967682e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.721566e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.721566e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.969969e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.755961e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.755961e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.607220 sec
+TOTAL       :     3.604328 sec
 INFO: No Floating Point Exceptions have been reported
-     9,997,689,808      cycles                           #    2.768 GHz                    
-    19,580,598,841      instructions                     #    1.96  insn per cycle         
-       3.612721980 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1929) (512y:    0) (512z:    0)
+    10,100,174,359      cycles                           #    2.799 GHz                    
+    19,576,296,506      instructions                     #    1.94  insn per cycle         
+       3.609879791 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1894) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.015746e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.813424e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.813424e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.022206e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.841390e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.841390e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.532733 sec
+TOTAL       :     3.520245 sec
 INFO: No Floating Point Exceptions have been reported
-     9,811,967,265      cycles                           #    2.774 GHz                    
-    19,264,271,138      instructions                     #    1.96  insn per cycle         
-       3.538120513 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1670) (512y:  178) (512z:    0)
+     9,894,539,917      cycles                           #    2.807 GHz                    
+    19,271,397,768      instructions                     #    1.95  insn per cycle         
+       3.525910639 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1636) (512y:  178) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.763379e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.340075e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.340075e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.762660e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.347769e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.347769e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.978075 sec
+TOTAL       :     3.982657 sec
 INFO: No Floating Point Exceptions have been reported
-     8,415,239,556      cycles                           #    2.113 GHz                    
-    15,592,978,303      instructions                     #    1.85  insn per cycle         
-       3.983452996 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  866) (512y:  156) (512z: 1237)
+     8,470,289,841      cycles                           #    2.124 GHz                    
+    15,587,855,124      instructions                     #    1.84  insn per cycle         
+       3.988212621 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  833) (512y:  153) (512z: 1240)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt
index 21ee791ce0..8184b4eff2 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 39s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-29_23:17:37
+DATE: 2024-09-18_12:42:17
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.492787e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.621744e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.179910e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.357145e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.547980e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.727026e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     0.685451 sec
+TOTAL       :     0.689616 sec
 INFO: No Floating Point Exceptions have been reported
-     2,651,264,313      cycles                           #    2.879 GHz                    
-     4,174,948,747      instructions                     #    1.57  insn per cycle         
-       0.978087360 seconds time elapsed
+     2,681,392,745      cycles                           #    2.885 GHz                    
+     4,097,806,151      instructions                     #    1.53  insn per cycle         
+       0.986657014 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.600183e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.050071e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.050071e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.608983e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.060555e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.060555e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.342486 sec
+TOTAL       :     4.321503 sec
 INFO: No Floating Point Exceptions have been reported
-    12,690,680,268      cycles                           #    2.919 GHz                    
-    32,481,885,780      instructions                     #    2.56  insn per cycle         
-       4.348166228 seconds time elapsed
+    12,686,452,587      cycles                           #    2.933 GHz                    
+    32,573,246,433      instructions                     #    2.57  insn per cycle         
+       4.326967751 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  281) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.021131e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.876235e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.876235e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.001283e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.839506e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.839506e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.528970 sec
+TOTAL       :     3.560441 sec
 INFO: No Floating Point Exceptions have been reported
-    10,310,937,032      cycles                           #    2.918 GHz                    
-    24,600,387,185      instructions                     #    2.39  insn per cycle         
-       3.534503426 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1251) (avx2:    0) (512y:    0) (512z:    0)
+    10,462,099,873      cycles                           #    2.934 GHz                    
+    24,899,188,532      instructions                     #    2.38  insn per cycle         
+       3.566316228 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1246) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.215463e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.225023e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.225023e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.199006e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.213700e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.213700e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.252959 sec
+TOTAL       :     3.271279 sec
 INFO: No Floating Point Exceptions have been reported
-     9,082,671,735      cycles                           #    2.788 GHz                    
-    16,914,669,530      instructions                     #    1.86  insn per cycle         
-       3.258703580 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1608) (512y:    0) (512z:    0)
+     9,171,998,387      cycles                           #    2.800 GHz                    
+    16,835,147,245      instructions                     #    1.84  insn per cycle         
+       3.276861848 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1599) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.291847e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.370167e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.370167e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.270242e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.359980e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.359980e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.155252 sec
+TOTAL       :     3.184078 sec
 INFO: No Floating Point Exceptions have been reported
-     8,860,640,460      cycles                           #    2.804 GHz                    
-    16,337,302,677      instructions                     #    1.84  insn per cycle         
-       3.161006597 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1344) (512y:  139) (512z:    0)
+     8,899,793,398      cycles                           #    2.791 GHz                    
+    16,396,706,280      instructions                     #    1.84  insn per cycle         
+       3.189617083 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1355) (512y:  139) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.936422e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.656528e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.656528e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.962735e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.715557e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.715557e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.659819 sec
+TOTAL       :     3.614098 sec
 INFO: No Floating Point Exceptions have been reported
-     7,888,827,724      cycles                           #    2.154 GHz                    
-    14,564,894,802      instructions                     #    1.85  insn per cycle         
-       3.665214653 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  990) (512y:  158) (512z:  954)
+     7,891,427,724      cycles                           #    2.181 GHz                    
+    14,556,226,424      instructions                     #    1.84  insn per cycle         
+       3.619718707 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1003) (512y:  158) (512z:  946)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt
index 7a6406d0c4..a7c1b0753b 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 26s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-29_23:18:03
+DATE: 2024-09-18_12:42:43
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.493801e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.641626e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.214265e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.653794e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.579157e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.778336e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     0.687800 sec
+TOTAL       :     0.686013 sec
 INFO: No Floating Point Exceptions have been reported
-     2,691,894,772      cycles                           #    2.880 GHz                    
-     4,100,143,973      instructions                     #    1.52  insn per cycle         
-       0.992202496 seconds time elapsed
+     2,680,206,326      cycles                           #    2.872 GHz                    
+     4,167,068,379      instructions                     #    1.55  insn per cycle         
+       0.992521934 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.096482e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.950388e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.950388e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.093810e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.934858e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.934858e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.412152 sec
+TOTAL       :     3.416949 sec
 INFO: No Floating Point Exceptions have been reported
-     9,976,603,472      cycles                           #    2.921 GHz                    
-    25,417,126,470      instructions                     #    2.55  insn per cycle         
-       3.417418960 seconds time elapsed
+    10,012,195,167      cycles                           #    2.926 GHz                    
+    25,507,793,848      instructions                     #    2.55  insn per cycle         
+       3.422575217 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  236) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.373808e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.642763e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.642763e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.342172e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.581913e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.581913e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.066171 sec
+TOTAL       :     3.102371 sec
 INFO: No Floating Point Exceptions have been reported
-     8,977,198,886      cycles                           #    2.923 GHz                    
-    21,409,395,242      instructions                     #    2.38  insn per cycle         
-       3.072036826 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1100) (avx2:    0) (512y:    0) (512z:    0)
+     9,123,975,305      cycles                           #    2.936 GHz                    
+    21,542,843,128      instructions                     #    2.36  insn per cycle         
+       3.108003766 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1112) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.352048e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.507087e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.507087e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.389028e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.617798e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.617798e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.082500 sec
+TOTAL       :     3.042023 sec
 INFO: No Floating Point Exceptions have been reported
-     8,651,955,812      cycles                           #    2.802 GHz                    
-    15,864,616,657      instructions                     #    1.83  insn per cycle         
-       3.088272692 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1481) (512y:    0) (512z:    0)
+     8,587,076,543      cycles                           #    2.818 GHz                    
+    15,956,957,926      instructions                     #    1.86  insn per cycle         
+       3.047668407 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1497) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.417412e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.673255e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.673255e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.421436e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.692453e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.692453e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.013451 sec
+TOTAL       :     3.006782 sec
 INFO: No Floating Point Exceptions have been reported
-     8,445,458,225      cycles                           #    2.799 GHz                    
-    15,571,912,210      instructions                     #    1.84  insn per cycle         
-       3.018940474 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1256) (512y:  141) (512z:    0)
+     8,445,737,284      cycles                           #    2.805 GHz                    
+    15,563,019,384      instructions                     #    1.84  insn per cycle         
+       3.012659502 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1264) (512y:  141) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.055814e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.899489e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.899489e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.061400e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.904070e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.904070e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.470320 sec
+TOTAL       :     3.459956 sec
 INFO: No Floating Point Exceptions have been reported
-     7,569,596,039      cycles                           #    2.178 GHz                    
-    14,278,276,260      instructions                     #    1.89  insn per cycle         
-       3.476115796 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1019) (512y:  164) (512z:  876)
+     7,611,248,188      cycles                           #    2.197 GHz                    
+    14,286,576,836      instructions                     #    1.88  insn per cycle         
+       3.465475679 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1031) (512y:  164) (512z:  876)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt
index 2b79257b9d..db1ecc021d 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 55s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-29_22:46:28
+DATE: 2024-09-18_12:09:40
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.371929e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.187596e+09                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.156077e+09                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.236538e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.678017e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.558515e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371687e-02 +- 3.270220e-06 )  GeV^0
-TOTAL       :     0.576278 sec
+TOTAL       :     0.582271 sec
 INFO: No Floating Point Exceptions have been reported
-     2,286,525,925      cycles                           #    2.859 GHz                    
-     3,613,378,662      instructions                     #    1.58  insn per cycle         
-       0.856826723 seconds time elapsed
+     2,326,541,752      cycles                           #    2.874 GHz                    
+     3,619,452,327      instructions                     #    1.56  insn per cycle         
+       0.866579999 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 121
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 109
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.074323e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.268108e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.268108e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.078772e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.274321e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.274321e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     6.228196 sec
+TOTAL       :     6.204384 sec
 INFO: No Floating Point Exceptions have been reported
-    18,255,285,954      cycles                           #    2.930 GHz                    
-    45,002,856,271      instructions                     #    2.47  insn per cycle         
-       6.233407509 seconds time elapsed
+    18,271,266,571      cycles                           #    2.943 GHz                    
+    45,007,026,058      instructions                     #    2.46  insn per cycle         
+       6.209806202 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  411) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.248332e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.427587e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.427587e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.258213e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.443370e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.443370e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     3.172190 sec
+TOTAL       :     3.161459 sec
 INFO: No Floating Point Exceptions have been reported
-     9,271,630,908      cycles                           #    2.919 GHz                    
-    22,288,587,741      instructions                     #    2.40  insn per cycle         
-       3.177416791 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1957) (avx2:    0) (512y:    0) (512z:    0)
+     9,301,142,039      cycles                           #    2.938 GHz                    
+    22,273,650,036      instructions                     #    2.39  insn per cycle         
+       3.166937253 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.412454e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.674219e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.674219e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.422291e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.701347e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.701347e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.979908 sec
+TOTAL       :     2.969313 sec
 INFO: No Floating Point Exceptions have been reported
-     8,365,515,955      cycles                           #    2.803 GHz                    
-    15,745,814,699      instructions                     #    1.88  insn per cycle         
-       2.985184245 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2595) (512y:    0) (512z:    0)
+     8,389,284,998      cycles                           #    2.822 GHz                    
+    15,752,357,337      instructions                     #    1.88  insn per cycle         
+       2.974718872 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2565) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.459370e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.777330e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.777330e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.405471e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.684326e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.684326e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.932621 sec
+TOTAL       :     2.990876 sec
 INFO: No Floating Point Exceptions have been reported
-     8,230,915,348      cycles                           #    2.804 GHz                    
-    15,599,118,118      instructions                     #    1.90  insn per cycle         
-       2.937899823 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2500) (512y:   12) (512z:    0)
+     8,285,038,888      cycles                           #    2.766 GHz                    
+    15,588,340,357      instructions                     #    1.88  insn per cycle         
+       2.996605246 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2462) (512y:   12) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.441935e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.730572e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.730572e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.444926e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.740937e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.740937e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270342e-06 )  GeV^0
-TOTAL       :     2.949312 sec
+TOTAL       :     2.948513 sec
 INFO: No Floating Point Exceptions have been reported
-     6,640,894,201      cycles                           #    2.249 GHz                    
-    12,857,017,252      instructions                     #    1.94  insn per cycle         
-       2.954500755 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1728) (512y:   17) (512z: 1439)
+     6,657,028,546      cycles                           #    2.254 GHz                    
+    12,863,339,645      instructions                     #    1.93  insn per cycle         
+       2.954217512 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1698) (512y:   16) (512z: 1440)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt
index 5d45f34ad5..47dd15a77b 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 03s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-29_23:27:41
+DATE: 2024-09-18_12:52:30
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -57,26 +53,30 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.143990e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.691952e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.691952e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.148525e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.888705e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.888705e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371710e-02 +- 3.270389e-06 )  GeV^0
-TOTAL       :     1.706255 sec
+TOTAL       :     1.710491 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     5,593,795,242      cycles                           #    2.895 GHz                    
-    10,219,614,438      instructions                     #    1.83  insn per cycle         
-       1.989782693 seconds time elapsed
+     5,650,857,001      cycles                           #    2.904 GHz                    
+    10,226,411,017      instructions                     #    1.81  insn per cycle         
+       2.002623091 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288)
 WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288)
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 121
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 109
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -95,20 +95,24 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.053223e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.238616e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.238616e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.051342e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.236619e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.236619e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     6.449226 sec
+TOTAL       :     6.467611 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    18,857,985,149      cycles                           #    2.922 GHz                    
-    45,146,976,947      instructions                     #    2.39  insn per cycle         
-       6.455362407 seconds time elapsed
+    18,975,945,343      cycles                           #    2.932 GHz                    
+    45,166,614,913      instructions                     #    2.38  insn per cycle         
+       6.474019296 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  411) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -125,20 +129,24 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.149278e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.209808e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.209808e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.143329e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.199468e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.199468e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     3.428675 sec
+TOTAL       :     3.443303 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     9,989,397,664      cycles                           #    2.909 GHz                    
-    23,625,198,030      instructions                     #    2.37  insn per cycle         
-       3.434932561 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1957) (avx2:    0) (512y:    0) (512z:    0)
+    10,057,348,114      cycles                           #    2.916 GHz                    
+    23,610,490,289      instructions                     #    2.35  insn per cycle         
+       3.450411330 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -155,20 +163,24 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.297366e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.420681e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.420681e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.288972e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.428534e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.428534e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     3.239437 sec
+TOTAL       :     3.255640 sec
 INFO: No Floating Point Exceptions have been reported
-     9,084,126,388      cycles                           #    2.800 GHz                    
-    16,865,877,588      instructions                     #    1.86  insn per cycle         
-       3.245743617 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2595) (512y:    0) (512z:    0)
+INFO: No Floating Point Exceptions have been reported
+     9,181,255,557      cycles                           #    2.815 GHz                    
+    16,874,424,213      instructions                     #    1.84  insn per cycle         
+       3.262739708 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2565) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -185,20 +197,24 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.345633e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.535854e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.535854e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.308266e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.504995e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.504995e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     3.180459 sec
+TOTAL       :     3.233704 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     8,968,440,697      cycles                           #    2.815 GHz                    
-    16,723,850,550      instructions                     #    1.86  insn per cycle         
-       3.186623646 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2500) (512y:   12) (512z:    0)
+     9,120,657,024      cycles                           #    2.815 GHz                    
+    16,716,849,319      instructions                     #    1.83  insn per cycle         
+       3.240866405 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2462) (512y:   12) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -215,20 +231,24 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.312595e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.430024e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.430024e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.329690e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.465437e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.465437e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270342e-06 )  GeV^0
-TOTAL       :     3.220913 sec
+TOTAL       :     3.207876 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     7,368,957,264      cycles                           #    2.284 GHz                    
-    14,062,326,131      instructions                     #    1.91  insn per cycle         
-       3.227188579 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1728) (512y:   17) (512z: 1439)
+     7,429,892,192      cycles                           #    2.312 GHz                    
+    14,072,572,968      instructions                     #    1.89  insn per cycle         
+       3.215041865 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1698) (512y:   16) (512z: 1440)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt
index 9524ab2b35..aa8d2ebaf9 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 00s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-29_23:39:18
+DATE: 2024-09-18_13:04:44
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.369646e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.161551e+09                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.100782e+09                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.285600e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.265115e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.156209e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371863e-02 +- 3.269951e-06 )  GeV^0
-TOTAL       :     1.208837 sec
+TOTAL       :     1.210437 sec
 INFO: No Floating Point Exceptions have been reported
-     4,108,047,745      cycles                           #    2.875 GHz                    
-     6,574,138,582      instructions                     #    1.60  insn per cycle         
-       1.486544721 seconds time elapsed
+     4,156,256,455      cycles                           #    2.890 GHz                    
+     6,567,216,103      instructions                     #    1.58  insn per cycle         
+       1.494886653 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 121
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 109
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.071530e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.264231e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.264231e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.068119e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.263869e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.263869e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371887e-02 +- 3.270267e-06 )  GeV^0
-TOTAL       :     6.589611 sec
+TOTAL       :     6.607111 sec
 INFO: No Floating Point Exceptions have been reported
-    19,260,628,477      cycles                           #    2.921 GHz                    
-    45,180,488,610      instructions                     #    2.35  insn per cycle         
-       6.594714994 seconds time elapsed
+    19,321,982,901      cycles                           #    2.923 GHz                    
+    45,195,162,918      instructions                     #    2.34  insn per cycle         
+       6.612467743 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  411) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.254234e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.437552e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.437552e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.243612e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.428241e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.428241e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371887e-02 +- 3.270266e-06 )  GeV^0
-TOTAL       :     3.508816 sec
+TOTAL       :     3.520615 sec
 INFO: No Floating Point Exceptions have been reported
-    10,282,216,303      cycles                           #    2.927 GHz                    
-    22,372,083,981      instructions                     #    2.18  insn per cycle         
-       3.513966837 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1957) (avx2:    0) (512y:    0) (512z:    0)
+    10,297,430,100      cycles                           #    2.921 GHz                    
+    22,355,563,747      instructions                     #    2.17  insn per cycle         
+       3.526233568 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.407117e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.666910e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.666910e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.394598e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.672540e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.672540e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371885e-02 +- 3.270112e-06 )  GeV^0
-TOTAL       :     3.330770 sec
+TOTAL       :     3.341420 sec
 INFO: No Floating Point Exceptions have been reported
-     9,378,233,860      cycles                           #    2.812 GHz                    
-    15,657,056,788      instructions                     #    1.67  insn per cycle         
-       3.335855333 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2595) (512y:    0) (512z:    0)
+     9,418,657,206      cycles                           #    2.815 GHz                    
+    15,664,231,235      instructions                     #    1.66  insn per cycle         
+       3.347085737 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2565) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.448156e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.779179e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.779179e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.438466e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.794511e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.794511e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371885e-02 +- 3.270112e-06 )  GeV^0
-TOTAL       :     3.294832 sec
+TOTAL       :     3.303984 sec
 INFO: No Floating Point Exceptions have been reported
-     9,310,523,208      cycles                           #    2.823 GHz                    
-    15,310,125,986      instructions                     #    1.64  insn per cycle         
-       3.300035766 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2500) (512y:   12) (512z:    0)
+     9,386,171,386      cycles                           #    2.837 GHz                    
+    15,303,933,132      instructions                     #    1.63  insn per cycle         
+       3.309654062 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2462) (512y:   12) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.434912e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.707938e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.707938e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.452484e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.752423e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.752423e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371885e-02 +- 3.270112e-06 )  GeV^0
-TOTAL       :     3.309853 sec
+TOTAL       :     3.289646 sec
 INFO: No Floating Point Exceptions have been reported
-     7,657,370,946      cycles                           #    2.311 GHz                    
-    12,565,328,780      instructions                     #    1.64  insn per cycle         
-       3.314923960 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1728) (512y:   17) (512z: 1439)
+     7,666,750,686      cycles                           #    2.328 GHz                    
+    12,574,987,911      instructions                     #    1.64  insn per cycle         
+       3.295237837 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1698) (512y:   16) (512z: 1440)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt
index ffc392dd39..b1a0ce17a0 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 01s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-29_23:36:30
+DATE: 2024-09-18_13:01:55
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.375499e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.177052e+09                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.131045e+09                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.261330e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.330186e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.320632e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371687e-02 +- 3.270220e-06 )  GeV^0
-TOTAL       :     0.860707 sec
+TOTAL       :     0.870896 sec
 INFO: No Floating Point Exceptions have been reported
-     3,119,359,049      cycles                           #    2.875 GHz                    
-     6,403,047,459      instructions                     #    2.05  insn per cycle         
-       1.141875008 seconds time elapsed
+     3,169,388,255      cycles                           #    2.893 GHz                    
+     6,475,497,648      instructions                     #    2.04  insn per cycle         
+       1.152555419 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 121
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 109
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.074662e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.267562e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.267562e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.073605e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.268067e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.268067e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     6.224213 sec
+TOTAL       :     6.229587 sec
 INFO: No Floating Point Exceptions have been reported
-    18,239,367,389      cycles                           #    2.929 GHz                    
-    45,000,984,186      instructions                     #    2.47  insn per cycle         
-       6.229329227 seconds time elapsed
+    18,291,168,737      cycles                           #    2.934 GHz                    
+    45,011,612,276      instructions                     #    2.46  insn per cycle         
+       6.234779882 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  411) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.256146e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.437532e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.437532e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.256629e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.445957e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.445957e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     3.162633 sec
+TOTAL       :     3.163666 sec
 INFO: No Floating Point Exceptions have been reported
-     9,272,693,132      cycles                           #    2.928 GHz                    
-    22,289,563,655      instructions                     #    2.40  insn per cycle         
-       3.167689272 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1957) (avx2:    0) (512y:    0) (512z:    0)
+     9,298,297,527      cycles                           #    2.935 GHz                    
+    22,274,073,872      instructions                     #    2.40  insn per cycle         
+       3.169388774 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.406656e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.671297e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.671297e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.408682e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.697748e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.697748e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.984968 sec
+TOTAL       :     2.982101 sec
 INFO: No Floating Point Exceptions have been reported
-     8,391,394,313      cycles                           #    2.808 GHz                    
-    15,746,990,400      instructions                     #    1.88  insn per cycle         
-       2.990019398 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2595) (512y:    0) (512z:    0)
+     8,431,325,276      cycles                           #    2.823 GHz                    
+    15,754,164,098      instructions                     #    1.87  insn per cycle         
+       2.987763200 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2565) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.456607e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.788447e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.788447e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.431779e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.775747e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.775747e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.931017 sec
+TOTAL       :     2.959110 sec
 INFO: No Floating Point Exceptions have been reported
-     8,255,059,737      cycles                           #    2.812 GHz                    
-    15,603,739,815      instructions                     #    1.89  insn per cycle         
-       2.936040895 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2500) (512y:   12) (512z:    0)
+     8,364,699,070      cycles                           #    2.822 GHz                    
+    15,593,908,028      instructions                     #    1.86  insn per cycle         
+       2.964932423 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2462) (512y:   12) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.440909e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.726128e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.726128e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.447600e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.732419e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.732419e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270342e-06 )  GeV^0
-TOTAL       :     2.950959 sec
+TOTAL       :     2.945776 sec
 INFO: No Floating Point Exceptions have been reported
-     6,644,904,192      cycles                           #    2.249 GHz                    
-    12,855,533,735      instructions                     #    1.93  insn per cycle         
-       2.956304259 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1728) (512y:   17) (512z: 1439)
+     6,653,741,333      cycles                           #    2.256 GHz                    
+    12,863,983,012      instructions                     #    1.93  insn per cycle         
+       2.951493425 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1698) (512y:   16) (512z: 1440)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt
index 6f289e010a..981ff690e7 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 00s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-29_23:33:44
+DATE: 2024-09-18_12:59:07
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -54,23 +50,26 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.960198e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.134432e+09                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.015289e+09                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.867533e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.208256e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.038855e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371710e-02 +- 3.270389e-06 )  GeV^0
-TOTAL       :     1.499916 sec
+TOTAL       :     1.512988 sec
 INFO: No Floating Point Exceptions have been reported
-     4,978,377,712      cycles                           #    2.897 GHz                    
-     9,174,791,316      instructions                     #    1.84  insn per cycle         
-       1.776745570 seconds time elapsed
+     5,035,172,508      cycles                           #    2.896 GHz                    
+     9,178,648,119      instructions                     #    1.82  insn per cycle         
+       1.796445964 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 121
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 109
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -88,20 +87,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.073628e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.267249e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.267249e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.073817e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.267606e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.267606e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     6.228944 sec
+TOTAL       :     6.232302 sec
 INFO: No Floating Point Exceptions have been reported
-    18,251,581,479      cycles                           #    2.928 GHz                    
-    44,998,012,023      instructions                     #    2.47  insn per cycle         
-       6.234003939 seconds time elapsed
+    18,275,461,834      cycles                           #    2.931 GHz                    
+    45,008,664,367      instructions                     #    2.46  insn per cycle         
+       6.237799317 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  411) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -117,20 +119,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.238225e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.397221e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.397221e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.243088e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.444450e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.444450e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     3.184599 sec
+TOTAL       :     3.181850 sec
 INFO: No Floating Point Exceptions have been reported
-     9,283,360,179      cycles                           #    2.911 GHz                    
-    22,288,296,036      instructions                     #    2.40  insn per cycle         
-       3.189871847 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1957) (avx2:    0) (512y:    0) (512z:    0)
+     9,350,023,781      cycles                           #    2.934 GHz                    
+    22,274,333,552      instructions                     #    2.38  insn per cycle         
+       3.187507510 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -146,20 +151,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.382997e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.617819e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.617819e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.392219e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.668104e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.668104e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     3.013651 sec
+TOTAL       :     2.999184 sec
 INFO: No Floating Point Exceptions have been reported
-     8,377,553,104      cycles                           #    2.776 GHz                    
-    15,745,871,236      instructions                     #    1.88  insn per cycle         
-       3.018832010 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2595) (512y:    0) (512z:    0)
+     8,440,748,249      cycles                           #    2.810 GHz                    
+    15,754,020,269      instructions                     #    1.87  insn per cycle         
+       3.004841956 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2565) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -175,20 +183,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.460372e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.777278e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.777278e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.422328e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.772097e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.772097e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.927250 sec
+TOTAL       :     2.968153 sec
 INFO: No Floating Point Exceptions have been reported
-     8,236,102,004      cycles                           #    2.810 GHz                    
-    15,605,409,431      instructions                     #    1.89  insn per cycle         
-       2.932355101 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2500) (512y:   12) (512z:    0)
+     8,367,700,869      cycles                           #    2.815 GHz                    
+    15,588,459,242      instructions                     #    1.86  insn per cycle         
+       2.973858535 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2462) (512y:   12) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -204,20 +215,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.420493e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.701429e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.701429e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.440851e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.737976e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.737976e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270342e-06 )  GeV^0
-TOTAL       :     2.973248 sec
+TOTAL       :     2.952558 sec
 INFO: No Floating Point Exceptions have been reported
-     6,660,400,903      cycles                           #    2.237 GHz                    
-    12,857,113,210      instructions                     #    1.93  insn per cycle         
-       2.978399312 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1728) (512y:   17) (512z: 1439)
+     6,664,861,082      cycles                           #    2.254 GHz                    
+    12,863,872,119      instructions                     #    1.93  insn per cycle         
+       2.958126027 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1698) (512y:   16) (512z: 1440)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt
index f29bf7a852..5f8c460514 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 49s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-29_22:46:54
+DATE: 2024-09-18_12:10:07
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.369611e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.203420e+09                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.201295e+09                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.297995e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.821835e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.125593e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371687e-02 +- 3.270220e-06 )  GeV^0
-TOTAL       :     0.574209 sec
+TOTAL       :     0.584822 sec
 INFO: No Floating Point Exceptions have been reported
-     2,289,523,558      cycles                           #    2.867 GHz                    
-     3,616,808,454      instructions                     #    1.58  insn per cycle         
-       0.854990957 seconds time elapsed
+     2,340,511,556      cycles                           #    2.876 GHz                    
+     3,573,310,904      instructions                     #    1.53  insn per cycle         
+       0.872056454 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 95
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 79
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.071743e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.267243e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.267243e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.074691e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.268219e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.268219e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     6.239937 sec
+TOTAL       :     6.225057 sec
 INFO: No Floating Point Exceptions have been reported
-    18,251,320,949      cycles                           #    2.923 GHz                    
-    44,972,609,752      instructions                     #    2.46  insn per cycle         
-       6.244989271 seconds time elapsed
+    18,266,994,357      cycles                           #    2.932 GHz                    
+    44,980,008,303      instructions                     #    2.46  insn per cycle         
+       6.230608513 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  397) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.244827e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.409474e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.409474e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.255829e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.437463e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.437463e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     3.175872 sec
+TOTAL       :     3.162961 sec
 INFO: No Floating Point Exceptions have been reported
-     9,283,967,387      cycles                           #    2.919 GHz                    
-    22,256,299,457      instructions                     #    2.40  insn per cycle         
-       3.181052612 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1940) (avx2:    0) (512y:    0) (512z:    0)
+     9,315,618,309      cycles                           #    2.941 GHz                    
+    22,235,168,853      instructions                     #    2.39  insn per cycle         
+       3.168519289 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1935) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.411886e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.680552e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.680552e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.414375e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.703911e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.703911e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.977280 sec
+TOTAL       :     2.977811 sec
 INFO: No Floating Point Exceptions have been reported
-     8,365,626,441      cycles                           #    2.806 GHz                    
-    15,740,277,047      instructions                     #    1.88  insn per cycle         
-       2.982795229 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2570) (512y:    0) (512z:    0)
+     8,430,687,956      cycles                           #    2.827 GHz                    
+    15,749,443,583      instructions                     #    1.87  insn per cycle         
+       2.983247205 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2540) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.432222e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.720588e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.720588e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.463260e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.781163e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.781163e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.961844 sec
+TOTAL       :     2.924999 sec
 INFO: No Floating Point Exceptions have been reported
-     8,238,479,740      cycles                           #    2.778 GHz                    
-    15,586,179,769      instructions                     #    1.89  insn per cycle         
-       2.966991606 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2469) (512y:   12) (512z:    0)
+     8,268,651,321      cycles                           #    2.823 GHz                    
+    15,583,986,651      instructions                     #    1.88  insn per cycle         
+       2.930392056 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2438) (512y:   10) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.428069e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.704769e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.704769e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.442819e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.745195e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.745195e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270342e-06 )  GeV^0
-TOTAL       :     2.963513 sec
+TOTAL       :     2.947951 sec
 INFO: No Floating Point Exceptions have been reported
-     6,611,440,096      cycles                           #    2.228 GHz                    
-    12,834,325,416      instructions                     #    1.94  insn per cycle         
-       2.968824892 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1698) (512y:   18) (512z: 1427)
+     6,669,419,569      cycles                           #    2.259 GHz                    
+    12,841,335,089      instructions                     #    1.93  insn per cycle         
+       2.953404356 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1669) (512y:   16) (512z: 1427)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt
index 3ba49685fe..438f6c4f2f 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 30s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-29_23:18:27
+DATE: 2024-09-18_12:43:08
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.296010e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.154221e+09                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.150279e+09                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.248809e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.661013e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.608831e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371687e-02 +- 3.270220e-06 )  GeV^0
-TOTAL       :     0.584441 sec
+TOTAL       :     0.584963 sec
 INFO: No Floating Point Exceptions have been reported
-     2,320,419,824      cycles                           #    2.868 GHz                    
-     3,634,414,746      instructions                     #    1.57  insn per cycle         
-       0.867556691 seconds time elapsed
+     2,341,003,807      cycles                           #    2.871 GHz                    
+     3,637,581,249      instructions                     #    1.55  insn per cycle         
+       0.872273356 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 121
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 109
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.632172e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.125712e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.125712e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.610499e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.089750e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.089750e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     4.228730 sec
+TOTAL       :     4.278573 sec
 INFO: No Floating Point Exceptions have been reported
-    12,184,938,847      cycles                           #    2.878 GHz                    
-    32,237,474,232      instructions                     #    2.65  insn per cycle         
-       4.234313345 seconds time elapsed
+    12,205,449,516      cycles                           #    2.850 GHz                    
+    32,295,858,353      instructions                     #    2.65  insn per cycle         
+       4.284066796 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  290) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.666121e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.502284e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.502284e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.650495e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.446725e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.446725e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     2.736132 sec
+TOTAL       :     2.750577 sec
 INFO: No Floating Point Exceptions have been reported
-     8,001,531,537      cycles                           #    2.920 GHz                    
-    18,696,819,081      instructions                     #    2.34  insn per cycle         
-       2.741605566 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1548) (avx2:    0) (512y:    0) (512z:    0)
+     8,071,356,692      cycles                           #    2.929 GHz                    
+    18,687,842,971      instructions                     #    2.32  insn per cycle         
+       2.756173554 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1534) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.780005e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.594822e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.594822e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.785833e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.615036e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.615036e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.634151 sec
+TOTAL       :     2.627067 sec
 INFO: No Floating Point Exceptions have been reported
-     7,439,358,965      cycles                           #    2.819 GHz                    
-    14,242,045,254      instructions                     #    1.91  insn per cycle         
-       2.639862200 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2222) (512y:    0) (512z:    0)
+     7,450,918,918      cycles                           #    2.831 GHz                    
+    14,249,285,643      instructions                     #    1.91  insn per cycle         
+       2.632635594 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2234) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.837633e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.742018e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.742018e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.828862e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.718189e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.718189e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.586770 sec
+TOTAL       :     2.597300 sec
 INFO: No Floating Point Exceptions have been reported
-     7,296,042,539      cycles                           #    2.815 GHz                    
-    13,934,395,120      instructions                     #    1.91  insn per cycle         
-       2.592340781 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2074) (512y:    3) (512z:    0)
+     7,335,966,912      cycles                           #    2.820 GHz                    
+    13,949,163,288      instructions                     #    1.90  insn per cycle         
+       2.602858413 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2087) (512y:    3) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.455305e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.802315e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.802315e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.491639e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.833175e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.833175e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270342e-06 )  GeV^0
-TOTAL       :     2.939990 sec
+TOTAL       :     2.900511 sec
 INFO: No Floating Point Exceptions have been reported
-     6,572,892,326      cycles                           #    2.232 GHz                    
-    13,414,317,144      instructions                     #    2.04  insn per cycle         
-       2.945482478 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2056) (512y:    1) (512z: 1197)
+     6,563,891,996      cycles                           #    2.259 GHz                    
+    13,436,075,613      instructions                     #    2.05  insn per cycle         
+       2.906157600 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2073) (512y:    1) (512z: 1201)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt
index 3bb46f222b..2bd01da79a 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 30s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-29_23:18:51
+DATE: 2024-09-18_12:43:31
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.296037e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.166988e+09                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.207168e+09                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.260194e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.691839e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.932675e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371687e-02 +- 3.270220e-06 )  GeV^0
-TOTAL       :     0.580765 sec
+TOTAL       :     0.585616 sec
 INFO: No Floating Point Exceptions have been reported
-     2,332,012,513      cycles                           #    2.872 GHz                    
-     3,668,769,216      instructions                     #    1.57  insn per cycle         
-       0.868538079 seconds time elapsed
+     2,337,485,665      cycles                           #    2.875 GHz                    
+     3,652,863,320      instructions                     #    1.56  insn per cycle         
+       0.871732359 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 95
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 79
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.207934e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.220486e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.220486e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.208067e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.235106e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.235106e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     3.220883 sec
+TOTAL       :     3.219491 sec
 INFO: No Floating Point Exceptions have been reported
-     9,370,617,522      cycles                           #    2.905 GHz                    
-    25,652,019,955      instructions                     #    2.74  insn per cycle         
-       3.226474616 seconds time elapsed
+     9,405,085,609      cycles                           #    2.917 GHz                    
+    25,703,807,777      instructions                     #    2.73  insn per cycle         
+       3.224847546 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  243) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.997177e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.535513e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.535513e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.972603e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.428852e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.428852e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     2.476192 sec
+TOTAL       :     2.491785 sec
 INFO: No Floating Point Exceptions have been reported
-     7,229,355,154      cycles                           #    2.914 GHz                    
-    16,867,444,320      instructions                     #    2.33  insn per cycle         
-       2.481841403 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1350) (avx2:    0) (512y:    0) (512z:    0)
+     7,313,494,275      cycles                           #    2.930 GHz                    
+    16,767,205,281      instructions                     #    2.29  insn per cycle         
+       2.497135576 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1311) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.936522e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.033228e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.033228e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.941057e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.047750e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.047750e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.509294 sec
+TOTAL       :     2.511970 sec
 INFO: No Floating Point Exceptions have been reported
-     7,122,962,556      cycles                           #    2.833 GHz                    
-    13,619,646,135      instructions                     #    1.91  insn per cycle         
-       2.514911103 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2046) (512y:    0) (512z:    0)
+     7,127,612,921      cycles                           #    2.833 GHz                    
+    13,657,719,583      instructions                     #    1.92  insn per cycle         
+       2.517264213 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2067) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.973129e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.132874e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.132874e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.994854e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.186874e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.186874e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.485889 sec
+TOTAL       :     2.472132 sec
 INFO: No Floating Point Exceptions have been reported
-     7,022,473,336      cycles                           #    2.819 GHz                    
-    13,427,556,193      instructions                     #    1.91  insn per cycle         
-       2.491467170 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1927) (512y:    4) (512z:    0)
+     7,033,406,697      cycles                           #    2.840 GHz                    
+    13,451,133,295      instructions                     #    1.91  insn per cycle         
+       2.477643200 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1935) (512y:    7) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.610878e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.134667e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.134667e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.610829e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.126124e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.126124e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270342e-06 )  GeV^0
-TOTAL       :     2.785068 sec
+TOTAL       :     2.783843 sec
 INFO: No Floating Point Exceptions have been reported
-     6,337,623,889      cycles                           #    2.272 GHz                    
-    13,143,208,474      instructions                     #    2.07  insn per cycle         
-       2.790716425 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2012) (512y:    1) (512z: 1083)
+     6,358,284,694      cycles                           #    2.280 GHz                    
+    13,173,247,957      instructions                     #    2.07  insn per cycle         
+       2.789438831 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2039) (512y:    2) (512z: 1081)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt
index 28937a024f..041f4e9efd 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 55s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-29_22:47:21
+DATE: 2024-09-18_12:10:34
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.818500e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.703571e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.142047e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.877042e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.647728e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.852998e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     0.680200 sec
+TOTAL       :     0.677390 sec
 INFO: No Floating Point Exceptions have been reported
-     2,612,649,294      cycles                           #    2.867 GHz                    
-     4,020,862,265      instructions                     #    1.54  insn per cycle         
-       0.973408337 seconds time elapsed
+     2,627,954,813      cycles                           #    2.876 GHz                    
+     4,055,520,615      instructions                     #    1.54  insn per cycle         
+       0.972709824 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.013555e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.178119e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.178119e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.020205e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.187124e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.187124e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     6.618295 sec
+TOTAL       :     6.577075 sec
 INFO: No Floating Point Exceptions have been reported
-    19,347,235,556      cycles                           #    2.921 GHz                    
-    46,262,872,265      instructions                     #    2.39  insn per cycle         
-       6.623644316 seconds time elapsed
+    19,371,933,844      cycles                           #    2.944 GHz                    
+    46,278,733,907      instructions                     #    2.39  insn per cycle         
+       6.582537613 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  466) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.622765e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.131585e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.131585e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.635520e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.155996e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.155996e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.292508 sec
+TOTAL       :     4.262104 sec
 INFO: No Floating Point Exceptions have been reported
-    12,523,530,255      cycles                           #    2.914 GHz                    
-    31,480,155,703      instructions                     #    2.51  insn per cycle         
-       4.298007948 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1720) (avx2:    0) (512y:    0) (512z:    0)
+    12,531,950,606      cycles                           #    2.937 GHz                    
+    31,465,132,198      instructions                     #    2.51  insn per cycle         
+       4.267832274 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1731) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.975666e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.736981e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.736981e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.976062e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.756066e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.756066e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.591492 sec
+TOTAL       :     3.592683 sec
 INFO: No Floating Point Exceptions have been reported
-    10,042,173,345      cycles                           #    2.793 GHz                    
-    19,471,578,015      instructions                     #    1.94  insn per cycle         
-       3.596768981 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2123) (512y:    0) (512z:    0)
+    10,114,837,946      cycles                           #    2.812 GHz                    
+    19,479,113,850      instructions                     #    1.93  insn per cycle         
+       3.598394582 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2045) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.012657e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.805102e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.805102e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.011048e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.815376e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.815376e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.533521 sec
+TOTAL       :     3.535103 sec
 INFO: No Floating Point Exceptions have been reported
-     9,885,722,561      cycles                           #    2.794 GHz                    
-    19,217,935,631      instructions                     #    1.94  insn per cycle         
-       3.538804173 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1866) (512y:  189) (512z:    0)
+     9,996,837,570      cycles                           #    2.824 GHz                    
+    19,291,566,393      instructions                     #    1.93  insn per cycle         
+       3.540686440 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1799) (512y:  188) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.788615e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.387860e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.387860e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.782393e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.383775e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.383775e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.929809 sec
+TOTAL       :     3.939537 sec
 INFO: No Floating Point Exceptions have been reported
-     8,344,069,760      cycles                           #    2.121 GHz                    
-    15,055,063,349      instructions                     #    1.80  insn per cycle         
-       3.935283980 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1044) (512y:  154) (512z: 1321)
+     8,379,017,732      cycles                           #    2.125 GHz                    
+    15,108,594,606      instructions                     #    1.80  insn per cycle         
+       3.945372714 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  966) (512y:  154) (512z: 1330)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt
index 5d9a67393b..63e5511d98 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 49s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-08-29_22:47:51
+DATE: 2024-09-18_12:11:04
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.811602e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.711473e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.162788e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.941580e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.659467e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.829628e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     0.682237 sec
+TOTAL       :     0.674748 sec
 INFO: No Floating Point Exceptions have been reported
-     2,620,474,570      cycles                           #    2.866 GHz                    
-     4,036,897,619      instructions                     #    1.54  insn per cycle         
-       0.975308933 seconds time elapsed
+     2,621,919,128      cycles                           #    2.880 GHz                    
+     4,081,332,751      instructions                     #    1.56  insn per cycle         
+       0.969735396 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.011729e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.174976e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.174976e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.022324e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.188868e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.188868e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     6.629920 sec
+TOTAL       :     6.564992 sec
 INFO: No Floating Point Exceptions have been reported
-    19,385,164,241      cycles                           #    2.922 GHz                    
-    46,202,402,296      instructions                     #    2.38  insn per cycle         
-       6.635438882 seconds time elapsed
+    19,266,332,416      cycles                           #    2.933 GHz                    
+    46,212,690,278      instructions                     #    2.40  insn per cycle         
+       6.570664425 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  453) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.611858e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.122967e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.122967e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.631635e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.147723e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.147723e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.319506 sec
+TOTAL       :     4.273260 sec
 INFO: No Floating Point Exceptions have been reported
-    12,604,008,704      cycles                           #    2.915 GHz                    
-    31,454,601,193      instructions                     #    2.50  insn per cycle         
-       4.324876632 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1712) (avx2:    0) (512y:    0) (512z:    0)
+    12,565,193,084      cycles                           #    2.937 GHz                    
+    31,464,303,429      instructions                     #    2.50  insn per cycle         
+       4.278983280 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1724) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.970707e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.735506e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.735506e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.965569e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.737055e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.737055e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.603419 sec
+TOTAL       :     3.608853 sec
 INFO: No Floating Point Exceptions have been reported
-    10,044,338,252      cycles                           #    2.785 GHz                    
-    19,460,257,886      instructions                     #    1.94  insn per cycle         
-       3.608852495 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2107) (512y:    0) (512z:    0)
+    10,149,451,908      cycles                           #    2.809 GHz                    
+    19,494,245,478      instructions                     #    1.92  insn per cycle         
+       3.614638314 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2036) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.982213e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.748665e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.748665e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.020584e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.826510e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.826510e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.584722 sec
+TOTAL       :     3.523442 sec
 INFO: No Floating Point Exceptions have been reported
-     9,883,464,828      cycles                           #    2.754 GHz                    
-    19,283,001,699      instructions                     #    1.95  insn per cycle         
-       3.590131099 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1860) (512y:  189) (512z:    0)
+     9,922,226,767      cycles                           #    2.813 GHz                    
+    19,194,396,105      instructions                     #    1.93  insn per cycle         
+       3.529032291 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1766) (512y:  191) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.814525e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.432144e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.432144e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.850816e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.505094e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.505094e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.878171 sec
+TOTAL       :     3.808968 sec
 INFO: No Floating Point Exceptions have been reported
-     8,218,463,055      cycles                           #    2.117 GHz                    
-    14,967,397,857      instructions                     #    1.82  insn per cycle         
-       3.883633012 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1037) (512y:  156) (512z: 1305)
+     8,221,926,837      cycles                           #    2.156 GHz                    
+    14,966,457,412      instructions                     #    1.82  insn per cycle         
+       3.814643788 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  959) (512y:  155) (512z: 1296)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt
index bf281d75b3..d77862b8c7 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 26s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-29_22:48:22
+DATE: 2024-09-18_12:11:34
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.725749e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.169025e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.280824e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.432691e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.350673e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.001727e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.527233 sec
+TOTAL       :     0.536474 sec
 INFO: No Floating Point Exceptions have been reported
-     2,173,008,163      cycles                           #    2.867 GHz                    
-     3,142,274,275      instructions                     #    1.45  insn per cycle         
-       0.816679974 seconds time elapsed
+     2,210,506,804      cycles                           #    2.873 GHz                    
+     3,172,337,100      instructions                     #    1.44  insn per cycle         
+       0.829286366 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.816184e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.863096e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.863096e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.830003e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.876741e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.876741e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     5.880537 sec
+TOTAL       :     5.837147 sec
 INFO: No Floating Point Exceptions have been reported
-    17,173,522,079      cycles                           #    2.918 GHz                    
-    45,940,447,501      instructions                     #    2.68  insn per cycle         
-       5.886318114 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  618) (avx2:    0) (512y:    0) (512z:    0)
+    17,232,906,357      cycles                           #    2.950 GHz                    
+    45,930,941,627      instructions                     #    2.67  insn per cycle         
+       5.842851386 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  617) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.164036e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.319146e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.319146e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.213968e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.373677e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.373677e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.420599 sec
+TOTAL       :     3.369805 sec
 INFO: No Floating Point Exceptions have been reported
-    10,005,630,694      cycles                           #    2.921 GHz                    
-    27,835,659,377      instructions                     #    2.78  insn per cycle         
-       3.426171105 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 2534) (avx2:    0) (512y:    0) (512z:    0)
+     9,944,028,092      cycles                           #    2.947 GHz                    
+    27,848,243,801      instructions                     #    2.80  insn per cycle         
+       3.375396234 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.985768e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.359051e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.359051e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.005348e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.393032e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.393032e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.209915 sec
+TOTAL       :     2.203017 sec
 INFO: No Floating Point Exceptions have been reported
-     6,062,740,815      cycles                           #    2.738 GHz                    
-    12,577,042,076      instructions                     #    2.07  insn per cycle         
-       2.215350137 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2612) (512y:    0) (512z:    0)
+     6,092,356,881      cycles                           #    2.759 GHz                    
+    12,580,147,933      instructions                     #    2.06  insn per cycle         
+       2.208781826 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2619) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.474101e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.932150e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.932150e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.533405e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.010418e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.010418e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.022046 sec
+TOTAL       :     2.002945 sec
 INFO: No Floating Point Exceptions have been reported
-     5,565,418,035      cycles                           #    2.746 GHz                    
-    12,015,644,834      instructions                     #    2.16  insn per cycle         
-       2.027619027 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2350) (512y:  144) (512z:    0)
+     5,570,120,100      cycles                           #    2.774 GHz                    
+    12,019,792,186      instructions                     #    2.16  insn per cycle         
+       2.008867487 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2360) (512y:  144) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.500041e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.680913e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.680913e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.539179e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.725857e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.725857e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.102703 sec
+TOTAL       :     3.069457 sec
 INFO: No Floating Point Exceptions have been reported
-     5,686,124,170      cycles                           #    1.830 GHz                    
-     8,290,168,560      instructions                     #    1.46  insn per cycle         
-       3.108444392 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1429) (512y:  122) (512z: 1801)
+     5,709,813,977      cycles                           #    1.857 GHz                    
+     8,292,916,903      instructions                     #    1.45  insn per cycle         
+       3.075340516 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1441) (512y:  122) (512z: 1802)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt
index 3c6e1bbc89..ac7eb7abb8 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 14s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-29_23:28:10
+DATE: 2024-09-18_12:52:59
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -57,15 +53,16 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.468536e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.988607e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.988607e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.492890e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.985153e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.985153e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.819103 sec
+TOTAL       :     0.825573 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     3,040,074,436      cycles                           #    2.872 GHz                    
-     4,744,943,802      instructions                     #    1.56  insn per cycle         
-       1.117026479 seconds time elapsed
+     3,089,630,348      cycles                           #    2.884 GHz                    
+     4,704,003,879      instructions                     #    1.52  insn per cycle         
+       1.129956624 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
@@ -76,7 +73,10 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -95,20 +95,24 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.813891e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.859845e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.859845e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.810470e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.856841e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.856841e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     5.965795 sec
+TOTAL       :     5.979891 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    17,492,314,151      cycles                           #    2.929 GHz                    
-    46,000,593,104      instructions                     #    2.63  insn per cycle         
-       5.972479390 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  618) (avx2:    0) (512y:    0) (512z:    0)
+    17,636,224,981      cycles                           #    2.947 GHz                    
+    46,002,491,255      instructions                     #    2.61  insn per cycle         
+       5.986641580 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  617) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -125,20 +129,24 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.138423e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.289757e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.289757e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.162709e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.318081e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.318081e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.527164 sec
+TOTAL       :     3.508343 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    10,339,180,979      cycles                           #    2.927 GHz                    
-    28,019,312,732      instructions                     #    2.71  insn per cycle         
-       3.533781604 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 2534) (avx2:    0) (512y:    0) (512z:    0)
+    10,301,947,786      cycles                           #    2.931 GHz                    
+    28,031,926,381      instructions                     #    2.72  insn per cycle         
+       3.516023780 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -155,20 +163,24 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.894274e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.257355e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.257355e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.911481e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.286736e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.286736e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.331879 sec
+TOTAL       :     2.328474 sec
 INFO: No Floating Point Exceptions have been reported
-     6,416,928,462      cycles                           #    2.745 GHz                    
-    12,863,992,862      instructions                     #    2.00  insn per cycle         
-       2.338617466 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2612) (512y:    0) (512z:    0)
+INFO: No Floating Point Exceptions have been reported
+     6,480,879,664      cycles                           #    2.775 GHz                    
+    12,869,228,758      instructions                     #    1.99  insn per cycle         
+       2.336129053 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2619) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -185,20 +197,24 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.396753e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.845240e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.845240e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.391900e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.844641e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.844641e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.129662 sec
+TOTAL       :     2.137350 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     5,897,393,565      cycles                           #    2.762 GHz                    
-    12,300,032,879      instructions                     #    2.09  insn per cycle         
-       2.136373131 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2350) (512y:  144) (512z:    0)
+     5,935,186,233      cycles                           #    2.768 GHz                    
+    12,309,185,637      instructions                     #    2.07  insn per cycle         
+       2.144981542 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2360) (512y:  144) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -215,20 +231,24 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.467289e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.644923e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.644923e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.478793e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.660239e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.660239e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.212063 sec
+TOTAL       :     3.208412 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     6,045,170,156      cycles                           #    1.879 GHz                    
-     8,535,674,778      instructions                     #    1.41  insn per cycle         
-       3.218783444 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1429) (512y:  122) (512z: 1801)
+     6,086,695,352      cycles                           #    1.893 GHz                    
+     8,539,357,346      instructions                     #    1.40  insn per cycle         
+       3.215882461 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1441) (512y:  122) (512z: 1802)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt
index 444ccae050..43a1422029 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 01s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-29_23:39:47
+DATE: 2024-09-18_13:05:13
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.724255e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.167416e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.278497e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.294862e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.316742e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.978216e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079401e+00 +- 3.402993e-03 )  GeV^0
-TOTAL       :     0.623921 sec
+TOTAL       :     0.632097 sec
 INFO: No Floating Point Exceptions have been reported
-     2,441,535,187      cycles                           #    2.863 GHz                    
-     3,583,960,573      instructions                     #    1.47  insn per cycle         
-       0.909280119 seconds time elapsed
+     2,509,027,611      cycles                           #    2.881 GHz                    
+     3,623,648,413      instructions                     #    1.44  insn per cycle         
+       0.928416005 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.823676e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.870580e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.870580e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.823170e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.870043e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.870043e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079401e+00 +- 3.402993e-03 )  GeV^0
-TOTAL       :     5.917583 sec
+TOTAL       :     5.922002 sec
 INFO: No Floating Point Exceptions have been reported
-    17,344,273,898      cycles                           #    2.929 GHz                    
-    45,956,921,757      instructions                     #    2.65  insn per cycle         
-       5.922963627 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  618) (avx2:    0) (512y:    0) (512z:    0)
+    17,445,049,338      cycles                           #    2.943 GHz                    
+    45,950,504,380      instructions                     #    2.63  insn per cycle         
+       5.927754556 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  617) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.157376e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.311663e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.311663e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.206424e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.368310e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.368310e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079401e+00 +- 3.402993e-03 )  GeV^0
-TOTAL       :     3.489223 sec
+TOTAL       :     3.440040 sec
 INFO: No Floating Point Exceptions have been reported
-    10,185,833,098      cycles                           #    2.916 GHz                    
-    27,834,944,277      instructions                     #    2.73  insn per cycle         
-       3.494676127 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 2534) (avx2:    0) (512y:    0) (512z:    0)
+    10,134,263,801      cycles                           #    2.942 GHz                    
+    27,846,437,463      instructions                     #    2.75  insn per cycle         
+       3.446069209 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.993976e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.367912e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.367912e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.972916e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.355947e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.355947e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079401e+00 +- 3.402993e-03 )  GeV^0
-TOTAL       :     2.267251 sec
+TOTAL       :     2.278924 sec
 INFO: No Floating Point Exceptions have been reported
-     6,259,034,691      cycles                           #    2.755 GHz                    
-    12,559,540,754      instructions                     #    2.01  insn per cycle         
-       2.272735159 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2612) (512y:    0) (512z:    0)
+     6,293,574,887      cycles                           #    2.756 GHz                    
+    12,563,410,868      instructions                     #    2.00  insn per cycle         
+       2.284852020 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2619) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.467063e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.922468e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.922468e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.484228e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.952695e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.952695e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079401e+00 +- 3.402993e-03 )  GeV^0
-TOTAL       :     2.086090 sec
+TOTAL       :     2.082886 sec
 INFO: No Floating Point Exceptions have been reported
-     5,749,773,653      cycles                           #    2.750 GHz                    
-    11,964,142,779      instructions                     #    2.08  insn per cycle         
-       2.091530563 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2350) (512y:  144) (512z:    0)
+     5,796,540,715      cycles                           #    2.776 GHz                    
+    11,970,685,605      instructions                     #    2.07  insn per cycle         
+       2.088838177 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2360) (512y:  144) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.501169e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.680263e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.680263e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.533537e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.719277e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.719277e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079401e+00 +- 3.402993e-03 )  GeV^0
-TOTAL       :     3.164368 sec
+TOTAL       :     3.139756 sec
 INFO: No Floating Point Exceptions have been reported
-     5,872,294,086      cycles                           #    1.853 GHz                    
-     8,241,399,099      instructions                     #    1.40  insn per cycle         
-       3.169920538 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1429) (512y:  122) (512z: 1801)
+     5,897,468,368      cycles                           #    1.875 GHz                    
+     8,242,833,828      instructions                     #    1.40  insn per cycle         
+       3.145931095 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1441) (512y:  122) (512z: 1802)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt
index 811ce8dae0..4d0fbacd91 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 00s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-29_23:36:57
+DATE: 2024-09-18_13:02:22
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.722197e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.167128e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.278372e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.273490e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.332144e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.976419e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.563802 sec
+TOTAL       :     0.567405 sec
 INFO: No Floating Point Exceptions have been reported
-     2,262,716,835      cycles                           #    2.865 GHz                    
-     3,565,491,409      instructions                     #    1.58  insn per cycle         
-       0.848472383 seconds time elapsed
+     2,317,717,097      cycles                           #    2.884 GHz                    
+     3,603,757,599      instructions                     #    1.55  insn per cycle         
+       0.860500177 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.813802e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.860264e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.860264e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.822880e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.870340e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.870340e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     5.887032 sec
+TOTAL       :     5.861471 sec
 INFO: No Floating Point Exceptions have been reported
-    17,158,565,283      cycles                           #    2.913 GHz                    
-    45,937,142,543      instructions                     #    2.68  insn per cycle         
-       5.892364010 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  618) (avx2:    0) (512y:    0) (512z:    0)
+    17,231,534,650      cycles                           #    2.937 GHz                    
+    45,932,602,629      instructions                     #    2.67  insn per cycle         
+       5.867633947 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  617) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.164067e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.318693e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.318693e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.195694e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.355589e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.355589e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.421238 sec
+TOTAL       :     3.390529 sec
 INFO: No Floating Point Exceptions have been reported
-    10,010,510,882      cycles                           #    2.922 GHz                    
-    27,835,852,414      instructions                     #    2.78  insn per cycle         
-       3.426638440 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 2534) (avx2:    0) (512y:    0) (512z:    0)
+     9,982,800,981      cycles                           #    2.940 GHz                    
+    27,850,403,920      instructions                     #    2.79  insn per cycle         
+       3.396205784 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.987212e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.363161e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.363161e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.027954e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.417032e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.417032e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.209515 sec
+TOTAL       :     2.193317 sec
 INFO: No Floating Point Exceptions have been reported
-     6,080,580,168      cycles                           #    2.747 GHz                    
-    12,577,088,822      instructions                     #    2.07  insn per cycle         
-       2.214907728 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2612) (512y:    0) (512z:    0)
+     6,072,828,383      cycles                           #    2.762 GHz                    
+    12,580,752,660      instructions                     #    2.07  insn per cycle         
+       2.199291134 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2619) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.476364e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.932446e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.932446e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.459397e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.922046e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.922046e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.020917 sec
+TOTAL       :     2.029275 sec
 INFO: No Floating Point Exceptions have been reported
-     5,548,645,581      cycles                           #    2.739 GHz                    
-    12,014,896,755      instructions                     #    2.17  insn per cycle         
-       2.026414780 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2350) (512y:  144) (512z:    0)
+     5,590,179,855      cycles                           #    2.748 GHz                    
+    12,020,448,396      instructions                     #    2.15  insn per cycle         
+       2.035272328 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2360) (512y:  144) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.468179e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.645722e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.645722e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.512487e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.697486e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.697486e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.129420 sec
+TOTAL       :     3.093387 sec
 INFO: No Floating Point Exceptions have been reported
-     5,691,542,603      cycles                           #    1.816 GHz                    
-     8,292,662,387      instructions                     #    1.46  insn per cycle         
-       3.134892980 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1429) (512y:  122) (512z: 1801)
+     5,720,535,090      cycles                           #    1.846 GHz                    
+     8,293,713,066      instructions                     #    1.45  insn per cycle         
+       3.099385086 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1441) (512y:  122) (512z: 1802)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt
index d3b910a217..06cd2419c8 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 01s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-29_23:34:11
+DATE: 2024-09-18_12:59:34
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -54,15 +50,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.834082e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.166063e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.277877e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.799214e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.349130e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.974876e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.710983 sec
+TOTAL       :     0.720471 sec
 INFO: No Floating Point Exceptions have been reported
-     2,705,219,930      cycles                           #    2.879 GHz                    
-     4,282,979,112      instructions                     #    1.58  insn per cycle         
-       0.996465503 seconds time elapsed
+     2,751,096,444      cycles                           #    2.885 GHz                    
+     4,339,626,159      instructions                     #    1.58  insn per cycle         
+       1.011336555 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
@@ -70,7 +66,10 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -88,20 +87,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.818437e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.864880e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.864880e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.816637e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.863801e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.863801e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     5.872383 sec
+TOTAL       :     5.879265 sec
 INFO: No Floating Point Exceptions have been reported
-    17,160,050,681      cycles                           #    2.920 GHz                    
-    45,940,289,918      instructions                     #    2.68  insn per cycle         
-       5.877757334 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  618) (avx2:    0) (512y:    0) (512z:    0)
+    17,261,479,917      cycles                           #    2.934 GHz                    
+    45,935,121,768      instructions                     #    2.66  insn per cycle         
+       5.884988360 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  617) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -117,20 +119,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.169677e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.325030e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.325030e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.202828e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.362707e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.362707e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.414466 sec
+TOTAL       :     3.382024 sec
 INFO: No Floating Point Exceptions have been reported
-    10,011,601,048      cycles                           #    2.928 GHz                    
-    27,837,161,782      instructions                     #    2.78  insn per cycle         
-       3.419970240 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 2534) (avx2:    0) (512y:    0) (512z:    0)
+     9,945,427,320      cycles                           #    2.936 GHz                    
+    27,847,352,314      instructions                     #    2.80  insn per cycle         
+       3.387994978 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -146,20 +151,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.899817e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.264552e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.264552e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.949448e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.331919e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.331919e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.246785 sec
+TOTAL       :     2.228024 sec
 INFO: No Floating Point Exceptions have been reported
-     6,167,664,851      cycles                           #    2.739 GHz                    
-    12,576,881,625      instructions                     #    2.04  insn per cycle         
-       2.252386496 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2612) (512y:    0) (512z:    0)
+     6,117,137,090      cycles                           #    2.739 GHz                    
+    12,580,569,234      instructions                     #    2.06  insn per cycle         
+       2.234097878 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2619) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -175,20 +183,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.481532e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.938711e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.938711e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.342003e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.785664e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.785664e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.019702 sec
+TOTAL       :     2.072519 sec
 INFO: No Floating Point Exceptions have been reported
-     5,565,720,220      cycles                           #    2.749 GHz                    
-    12,015,098,672      instructions                     #    2.16  insn per cycle         
-       2.025137451 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2350) (512y:  144) (512z:    0)
+     5,591,470,515      cycles                           #    2.691 GHz                    
+    12,020,476,993      instructions                     #    2.15  insn per cycle         
+       2.078517041 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2360) (512y:  144) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -204,20 +215,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.505249e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.686404e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.686404e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.530876e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.717281e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.717281e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.097156 sec
+TOTAL       :     3.077997 sec
 INFO: No Floating Point Exceptions have been reported
-     5,693,059,407      cycles                           #    1.835 GHz                    
-     8,290,060,020      instructions                     #    1.46  insn per cycle         
-       3.102640874 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1429) (512y:  122) (512z: 1801)
+     5,702,073,376      cycles                           #    1.850 GHz                    
+     8,294,780,221      instructions                     #    1.45  insn per cycle         
+       3.083993360 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1441) (512y:  122) (512z: 1802)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt
index 8c4732a972..a4f203143e 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 49s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-29_22:48:47
+DATE: 2024-09-18_12:11:59
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.725379e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.168865e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.278488e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.820817e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.978279e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.339111e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.526687 sec
+TOTAL       :     0.700005 sec
 INFO: No Floating Point Exceptions have been reported
-     2,160,622,776      cycles                           #    2.845 GHz                    
-     3,135,821,431      instructions                     #    1.45  insn per cycle         
-       0.815922370 seconds time elapsed
+     2,762,648,255      cycles                           #    2.857 GHz                    
+     3,086,101,973      instructions                     #    1.12  insn per cycle         
+       1.026825767 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 208
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 212
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.868877e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.918216e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.918216e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.875216e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.924982e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.924982e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     5.716529 sec
+TOTAL       :     5.700425 sec
 INFO: No Floating Point Exceptions have been reported
-    16,714,662,218      cycles                           #    2.922 GHz                    
-    44,929,451,831      instructions                     #    2.69  insn per cycle         
-       5.721855786 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  568) (avx2:    0) (512y:    0) (512z:    0)
+    16,757,702,666      cycles                           #    2.937 GHz                    
+    44,923,641,547      instructions                     #    2.68  insn per cycle         
+       5.706326125 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  567) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.323426e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.496662e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.496662e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.370762e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.546946e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.546946e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.263820 sec
+TOTAL       :     3.219225 sec
 INFO: No Floating Point Exceptions have been reported
-     9,527,140,435      cycles                           #    2.915 GHz                    
-    26,694,078,018      instructions                     #    2.80  insn per cycle         
-       3.269292343 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 2331) (avx2:    0) (512y:    0) (512z:    0)
+     9,494,791,570      cycles                           #    2.945 GHz                    
+    26,687,379,503      instructions                     #    2.81  insn per cycle         
+       3.225069589 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 2327) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.562467e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.880552e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.880552e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.607569e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.929909e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.929909e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.405499 sec
+TOTAL       :     2.383333 sec
 INFO: No Floating Point Exceptions have been reported
-     6,601,230,669      cycles                           #    2.739 GHz                    
-    14,116,197,972      instructions                     #    2.14  insn per cycle         
-       2.410986894 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2703) (512y:    0) (512z:    0)
+     6,604,949,302      cycles                           #    2.766 GHz                    
+    14,119,001,234      instructions                     #    2.14  insn per cycle         
+       2.388928721 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2711) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.773473e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.112833e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.112833e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.803756e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.157173e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.157173e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.302668 sec
+TOTAL       :     2.289999 sec
 INFO: No Floating Point Exceptions have been reported
-     6,319,983,051      cycles                           #    2.739 GHz                    
-    13,699,915,851      instructions                     #    2.17  insn per cycle         
-       2.308121633 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2349) (512y:  297) (512z:    0)
+     6,348,634,731      cycles                           #    2.767 GHz                    
+    13,715,767,912      instructions                     #    2.16  insn per cycle         
+       2.295499005 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2360) (512y:  298) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.364540e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.530799e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.530799e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.387276e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.557456e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.557456e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.222914 sec
+TOTAL       :     3.203296 sec
 INFO: No Floating Point Exceptions have been reported
-     5,894,748,475      cycles                           #    1.827 GHz                    
-    10,058,448,006      instructions                     #    1.71  insn per cycle         
-       3.228425871 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1261) (512y:  208) (512z: 1987)
+     5,911,433,799      cycles                           #    1.843 GHz                    
+    10,058,967,230      instructions                     #    1.70  insn per cycle         
+       3.209029605 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1273) (512y:  208) (512z: 1988)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt
index d808eb7b63..797e37fdb1 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 31s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-29_23:19:12
+DATE: 2024-09-18_12:43:53
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.580953e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.164243e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.279779e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.310192e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.359217e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.986325e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.528023 sec
+TOTAL       :     0.536645 sec
 INFO: No Floating Point Exceptions have been reported
-     2,213,384,103      cycles                           #    2.873 GHz                    
-     3,159,522,648      instructions                     #    1.43  insn per cycle         
-       0.827781723 seconds time elapsed
+     2,216,199,851      cycles                           #    2.870 GHz                    
+     3,159,776,582      instructions                     #    1.43  insn per cycle         
+       0.831121874 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.417645e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.500903e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.500903e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.421869e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.505515e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.505515e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     4.443506 sec
+TOTAL       :     4.438528 sec
 INFO: No Floating Point Exceptions have been reported
-    12,992,900,472      cycles                           #    2.921 GHz                    
-    34,329,063,073      instructions                     #    2.64  insn per cycle         
-       4.449379965 seconds time elapsed
+    13,015,204,187      cycles                           #    2.929 GHz                    
+    34,341,759,533      instructions                     #    2.64  insn per cycle         
+       4.444441151 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  665) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.973650e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.110657e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.110657e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.982901e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.119934e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.119934e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.634011 sec
+TOTAL       :     3.624625 sec
 INFO: No Floating Point Exceptions have been reported
-    10,678,102,516      cycles                           #    2.934 GHz                    
-    23,998,547,916      instructions                     #    2.25  insn per cycle         
-       3.639975393 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 2571) (avx2:    0) (512y:    0) (512z:    0)
+    10,679,803,279      cycles                           #    2.942 GHz                    
+    24,245,188,333      instructions                     #    2.27  insn per cycle         
+       3.630600501 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 2610) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.579404e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.901013e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.901013e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.555816e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.876140e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.876140e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.398468 sec
+TOTAL       :     2.410266 sec
 INFO: No Floating Point Exceptions have been reported
-     6,583,807,996      cycles                           #    2.739 GHz                    
-    12,341,554,114      instructions                     #    1.87  insn per cycle         
-       2.404741988 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3096) (512y:    0) (512z:    0)
+     6,676,895,845      cycles                           #    2.765 GHz                    
+    12,404,391,789      instructions                     #    1.86  insn per cycle         
+       2.415872101 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3115) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.932760e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.298403e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.298403e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.932497e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.306284e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.306284e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.233613 sec
+TOTAL       :     2.233356 sec
 INFO: No Floating Point Exceptions have been reported
-     6,166,393,584      cycles                           #    2.754 GHz                    
-    11,564,413,084      instructions                     #    1.88  insn per cycle         
-       2.239534572 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2640) (512y:  239) (512z:    0)
+     6,172,218,152      cycles                           #    2.758 GHz                    
+    11,544,853,425      instructions                     #    1.87  insn per cycle         
+       2.239017897 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2644) (512y:  239) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.724843e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.929225e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.929225e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.760390e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.970863e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.970863e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.922222 sec
+TOTAL       :     2.895519 sec
 INFO: No Floating Point Exceptions have been reported
-     5,395,079,092      cycles                           #    1.844 GHz                    
-     9,281,282,823      instructions                     #    1.72  insn per cycle         
-       2.928247598 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2084) (512y:  282) (512z: 1954)
+     5,386,476,820      cycles                           #    1.857 GHz                    
+     9,291,001,680      instructions                     #    1.72  insn per cycle         
+       2.901312030 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2099) (512y:  282) (512z: 1958)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt
index 5ff2869e2d..af0c8fa098 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 27s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-29_23:19:36
+DATE: 2024-09-18_12:44:17
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.561341e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.161596e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.277122e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.294016e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.195619e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.822974e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.531876 sec
+TOTAL       :     0.537359 sec
 INFO: No Floating Point Exceptions have been reported
-     2,190,984,202      cycles                           #    2.865 GHz                    
-     3,162,138,864      instructions                     #    1.44  insn per cycle         
-       0.823306138 seconds time elapsed
+     2,212,895,393      cycles                           #    2.861 GHz                    
+     3,167,520,059      instructions                     #    1.43  insn per cycle         
+       0.832101772 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 208
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 212
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.555546e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.648822e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.648822e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.565164e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.657330e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.657330e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     4.209735 sec
+TOTAL       :     4.194388 sec
 INFO: No Floating Point Exceptions have been reported
-    12,311,073,638      cycles                           #    2.921 GHz                    
-    34,900,214,419      instructions                     #    2.83  insn per cycle         
-       4.215528854 seconds time elapsed
+    12,320,787,698      cycles                           #    2.934 GHz                    
+    34,912,998,062      instructions                     #    2.83  insn per cycle         
+       4.200192046 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  430) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.946352e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.082788e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.082788e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.989812e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.127480e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.127480e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.666951 sec
+TOTAL       :     3.616035 sec
 INFO: No Floating Point Exceptions have been reported
-    10,672,048,366      cycles                           #    2.906 GHz                    
-    23,002,190,728      instructions                     #    2.16  insn per cycle         
-       3.672904066 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 2339) (avx2:    0) (512y:    0) (512z:    0)
+    10,626,604,482      cycles                           #    2.935 GHz                    
+    23,338,496,545      instructions                     #    2.20  insn per cycle         
+       3.621790672 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 2378) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.911282e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.277398e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.277398e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.054894e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.447738e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.447738e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.242288 sec
+TOTAL       :     2.181056 sec
 INFO: No Floating Point Exceptions have been reported
-     6,169,794,712      cycles                           #    2.745 GHz                    
-    11,950,055,335      instructions                     #    1.94  insn per cycle         
-       2.248190170 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2484) (512y:    0) (512z:    0)
+     6,051,059,717      cycles                           #    2.768 GHz                    
+    11,860,809,289      instructions                     #    1.96  insn per cycle         
+       2.186772408 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2468) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.019160e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.398604e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.398604e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.028106e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.414371e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.414371e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.196254 sec
+TOTAL       :     2.192869 sec
 INFO: No Floating Point Exceptions have been reported
-     6,049,955,238      cycles                           #    2.748 GHz                    
-    11,121,316,368      instructions                     #    1.84  insn per cycle         
-       2.202221657 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2095) (512y:  174) (512z:    0)
+     6,064,121,206      cycles                           #    2.759 GHz                    
+    11,098,432,522      instructions                     #    1.83  insn per cycle         
+       2.198761953 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2098) (512y:  174) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.772228e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.982193e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.982193e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.876416e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.107845e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.107845e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.886677 sec
+TOTAL       :     2.813537 sec
 INFO: No Floating Point Exceptions have been reported
-     5,316,806,565      cycles                           #    1.839 GHz                    
-     9,020,560,556      instructions                     #    1.70  insn per cycle         
-       2.892692780 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1620) (512y:  208) (512z: 1570)
+     5,237,838,464      cycles                           #    1.858 GHz                    
+     9,015,066,552      instructions                     #    1.72  insn per cycle         
+       2.819357375 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1632) (512y:  208) (512z: 1567)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt
index f0c0458a56..0cce370026 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 54s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-29_22:49:12
+DATE: 2024-09-18_12:12:25
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.071799e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.189965e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.392859e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.285654e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.744544e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.855248e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086719e+00 +- 3.413389e-03 )  GeV^0
-TOTAL       :     0.480577 sec
+TOTAL       :     0.489810 sec
 INFO: No Floating Point Exceptions have been reported
-     2,038,597,081      cycles                           #    2.857 GHz                    
-     2,938,312,083      instructions                     #    1.44  insn per cycle         
-       0.770278973 seconds time elapsed
+     2,058,086,051      cycles                           #    2.871 GHz                    
+     2,937,778,801      instructions                     #    1.43  insn per cycle         
+       0.774500335 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 149
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 124
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.915621e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.969220e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.969220e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.924099e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.978298e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.978298e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086780e+00 +- 3.413794e-03 )  GeV^0
-TOTAL       :     5.557589 sec
+TOTAL       :     5.535123 sec
 INFO: No Floating Point Exceptions have been reported
-    16,233,982,752      cycles                           #    2.919 GHz                    
-    45,332,344,917      instructions                     #    2.79  insn per cycle         
-       5.562666162 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  592) (avx2:    0) (512y:    0) (512z:    0)
+    16,260,554,497      cycles                           #    2.935 GHz                    
+    45,332,637,380      instructions                     #    2.79  insn per cycle         
+       5.540566072 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  591) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.521998e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.857013e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.857013e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.537932e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.874791e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.874791e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086779e+00 +- 3.413793e-03 )  GeV^0
-TOTAL       :     2.403382 sec
+TOTAL       :     2.397466 sec
 INFO: No Floating Point Exceptions have been reported
-     7,048,724,233      cycles                           #    2.928 GHz                    
-    17,767,905,011      instructions                     #    2.52  insn per cycle         
-       2.408503086 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2:    0) (512y:    0) (512z:    0)
+     7,088,165,806      cycles                           #    2.951 GHz                    
+    17,790,594,363      instructions                     #    2.51  insn per cycle         
+       2.403188687 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.219776e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.310619e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.310619e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.392634e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.540507e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.540507e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.360374 sec
+TOTAL       :     1.334609 sec
 INFO: No Floating Point Exceptions have been reported
-     3,741,111,926      cycles                           #    2.741 GHz                    
-     8,257,978,042      instructions                     #    2.21  insn per cycle         
-       1.365656837 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3350) (512y:    0) (512z:    0)
+     3,736,094,091      cycles                           #    2.789 GHz                    
+     8,261,313,611      instructions                     #    2.21  insn per cycle         
+       1.340132908 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3367) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.800434e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.005315e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.005315e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.862239e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.012505e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.012505e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.275551 sec
+TOTAL       :     1.268798 sec
 INFO: No Floating Point Exceptions have been reported
-     3,534,104,068      cycles                           #    2.761 GHz                    
-     7,913,123,613      instructions                     #    2.24  insn per cycle         
-       1.280701746 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3196) (512y:   20) (512z:    0)
+     3,543,869,427      cycles                           #    2.783 GHz                    
+     7,911,503,214      instructions                     #    2.23  insn per cycle         
+       1.274261347 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3209) (512y:   20) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.455759e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.102326e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.102326e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.491068e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.141806e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.141806e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.709162 sec
+TOTAL       :     1.701721 sec
 INFO: No Floating Point Exceptions have been reported
-     3,253,158,863      cycles                           #    1.899 GHz                    
-     6,093,971,876      instructions                     #    1.87  insn per cycle         
-       1.714418078 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2287) (512y:   24) (512z: 2153)
+     3,270,419,298      cycles                           #    1.917 GHz                    
+     6,095,745,028      instructions                     #    1.86  insn per cycle         
+       1.707211646 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2251) (512y:   22) (512z: 2155)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt
index 8326a87b8d..5e7502fc17 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 04s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-29_23:28:36
+DATE: 2024-09-18_12:53:26
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -57,26 +53,30 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.951895e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.416585e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.416585e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.022210e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.414163e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.414163e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086805e+00 +- 3.414078e-03 )  GeV^0
-TOTAL       :     0.676117 sec
+TOTAL       :     0.683925 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,630,316,460      cycles                           #    2.880 GHz                    
-     4,097,364,848      instructions                     #    1.56  insn per cycle         
-       0.971302668 seconds time elapsed
+     2,644,974,332      cycles                           #    2.886 GHz                    
+     4,089,078,726      instructions                     #    1.55  insn per cycle         
+       0.974029218 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288)
 WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288)
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 149
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 124
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -95,20 +95,24 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.896591e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.949047e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.949047e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.927229e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.981708e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.981708e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086780e+00 +- 3.413794e-03 )  GeV^0
-TOTAL       :     5.656810 sec
+TOTAL       :     5.572760 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    16,419,925,580      cycles                           #    2.900 GHz                    
-    45,375,599,039      instructions                     #    2.76  insn per cycle         
-       5.663172482 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  592) (avx2:    0) (512y:    0) (512z:    0)
+    16,435,796,229      cycles                           #    2.946 GHz                    
+    45,376,812,282      instructions                     #    2.76  insn per cycle         
+       5.580128034 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  591) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -125,20 +129,24 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.458760e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.790118e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.790118e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.483217e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.814609e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.814609e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086779e+00 +- 3.413793e-03 )  GeV^0
-TOTAL       :     2.482654 sec
+TOTAL       :     2.475211 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     7,237,921,119      cycles                           #    2.909 GHz                    
-    18,048,078,725      instructions                     #    2.49  insn per cycle         
-       2.488857904 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2:    0) (512y:    0) (512z:    0)
+     7,297,746,086      cycles                           #    2.941 GHz                    
+    18,073,033,530      instructions                     #    2.48  insn per cycle         
+       2.482430942 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -155,20 +163,24 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.197371e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.278269e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.278269e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.199525e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.300829e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.300829e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.409020 sec
+TOTAL       :     1.415446 sec
 INFO: No Floating Point Exceptions have been reported
-     3,922,879,812      cycles                           #    2.773 GHz                    
-     8,495,549,912      instructions                     #    2.17  insn per cycle         
-       1.415323468 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3350) (512y:    0) (512z:    0)
+INFO: No Floating Point Exceptions have been reported
+     3,953,896,804      cycles                           #    2.781 GHz                    
+     8,500,905,843      instructions                     #    2.15  insn per cycle         
+       1.422523843 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3367) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -185,20 +197,24 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.667680e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.889347e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.889347e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.608107e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.919736e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.919736e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.341925 sec
+TOTAL       :     1.364816 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     3,729,600,082      cycles                           #    2.768 GHz                    
-     8,150,649,069      instructions                     #    2.19  insn per cycle         
-       1.348181911 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3196) (512y:   20) (512z:    0)
+     3,828,677,143      cycles                           #    2.793 GHz                    
+     8,155,232,689      instructions                     #    2.13  insn per cycle         
+       1.371531073 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3209) (512y:   20) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -215,20 +231,24 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.360708e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.989495e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.989495e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.398900e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.033073e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.033073e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.780482 sec
+TOTAL       :     1.777483 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     3,455,332,957      cycles                           #    1.935 GHz                    
-     6,348,114,195      instructions                     #    1.84  insn per cycle         
-       1.786742043 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2287) (512y:   24) (512z: 2153)
+     3,485,580,348      cycles                           #    1.954 GHz                    
+     6,352,386,091      instructions                     #    1.82  insn per cycle         
+       1.784705241 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2251) (512y:   22) (512z: 2155)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt
index 71fa71b4dc..7b3bdcf221 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 00s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-29_23:40:12
+DATE: 2024-09-18_13:05:38
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.068568e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.187560e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.391526e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.256953e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.707995e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.827629e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079446e+00 +- 3.403306e-03 )  GeV^0
-TOTAL       :     0.573943 sec
+TOTAL       :     0.581626 sec
 INFO: No Floating Point Exceptions have been reported
-     2,277,708,206      cycles                           #    2.874 GHz                    
-     3,358,053,416      instructions                     #    1.47  insn per cycle         
-       0.850275167 seconds time elapsed
+     2,320,591,922      cycles                           #    2.873 GHz                    
+     3,370,044,879      instructions                     #    1.45  insn per cycle         
+       0.865525838 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 149
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 124
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.917195e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.972239e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.972239e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.923451e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.977569e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.977569e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079573e+00 +- 3.404712e-03 )  GeV^0
-TOTAL       :     5.612722 sec
+TOTAL       :     5.596105 sec
 INFO: No Floating Point Exceptions have been reported
-    16,407,033,244      cycles                           #    2.921 GHz                    
-    45,362,251,036      instructions                     #    2.76  insn per cycle         
-       5.617968376 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  592) (avx2:    0) (512y:    0) (512z:    0)
+    16,423,082,806      cycles                           #    2.932 GHz                    
+    45,361,162,230      instructions                     #    2.76  insn per cycle         
+       5.601871750 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  591) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.525760e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.861516e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.861516e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.510624e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.845954e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.845954e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079572e+00 +- 3.404712e-03 )  GeV^0
-TOTAL       :     2.459117 sec
+TOTAL       :     2.467999 sec
 INFO: No Floating Point Exceptions have been reported
-     7,212,908,479      cycles                           #    2.929 GHz                    
-    17,780,160,206      instructions                     #    2.47  insn per cycle         
-       2.464386875 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2:    0) (512y:    0) (512z:    0)
+     7,259,263,758      cycles                           #    2.936 GHz                    
+    17,804,964,488      instructions                     #    2.45  insn per cycle         
+       2.473643333 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.204922e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.294906e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.294906e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.271097e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.411462e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.411462e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079550e+00 +- 3.404207e-03 )  GeV^0
-TOTAL       :     1.419201 sec
+TOTAL       :     1.412437 sec
 INFO: No Floating Point Exceptions have been reported
-     3,902,872,992      cycles                           #    2.742 GHz                    
-     8,243,358,841      instructions                     #    2.11  insn per cycle         
-       1.424422244 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3350) (512y:    0) (512z:    0)
+     3,908,301,423      cycles                           #    2.757 GHz                    
+     8,246,550,739      instructions                     #    2.11  insn per cycle         
+       1.418307229 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3367) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.783463e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.004663e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.004663e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.660773e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.926852e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.926852e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079550e+00 +- 3.404207e-03 )  GeV^0
-TOTAL       :     1.336432 sec
+TOTAL       :     1.356729 sec
 INFO: No Floating Point Exceptions have been reported
-     3,705,633,949      cycles                           #    2.764 GHz                    
-     7,864,024,156      instructions                     #    2.12  insn per cycle         
-       1.341589225 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3196) (512y:   20) (512z:    0)
+     3,755,878,291      cycles                           #    2.759 GHz                    
+     7,864,539,547      instructions                     #    2.09  insn per cycle         
+       1.362169016 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3209) (512y:   20) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.472436e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.118654e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.118654e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.442823e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.089629e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.089629e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079550e+00 +- 3.404208e-03 )  GeV^0
-TOTAL       :     1.763167 sec
+TOTAL       :     1.773182 sec
 INFO: No Floating Point Exceptions have been reported
-     3,422,334,381      cycles                           #    1.936 GHz                    
-     6,043,294,482      instructions                     #    1.77  insn per cycle         
-       1.768435505 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2287) (512y:   24) (512z: 2153)
+     3,435,797,893      cycles                           #    1.932 GHz                    
+     6,046,565,657      instructions                     #    1.76  insn per cycle         
+       1.778888357 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2251) (512y:   22) (512z: 2155)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt
index 0d8155a7b6..423fac7e32 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 01s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-29_23:37:22
+DATE: 2024-09-18_13:02:47
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.074442e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.188782e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.393838e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.120397e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.694282e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.817714e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086719e+00 +- 3.413389e-03 )  GeV^0
-TOTAL       :     0.517208 sec
+TOTAL       :     0.524521 sec
 INFO: No Floating Point Exceptions have been reported
-     2,103,227,271      cycles                           #    2.862 GHz                    
-     3,266,134,218      instructions                     #    1.55  insn per cycle         
-       0.793508043 seconds time elapsed
+     2,152,741,046      cycles                           #    2.863 GHz                    
+     3,343,842,138      instructions                     #    1.55  insn per cycle         
+       0.808879979 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 149
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 124
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.912478e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.965888e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.965888e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.913977e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.967735e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.967735e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086780e+00 +- 3.413794e-03 )  GeV^0
-TOTAL       :     5.567336 sec
+TOTAL       :     5.564436 sec
 INFO: No Floating Point Exceptions have been reported
-    16,244,625,996      cycles                           #    2.916 GHz                    
-    45,334,027,591      instructions                     #    2.79  insn per cycle         
-       5.572437548 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  592) (avx2:    0) (512y:    0) (512z:    0)
+    16,258,370,325      cycles                           #    2.920 GHz                    
+    45,334,605,627      instructions                     #    2.79  insn per cycle         
+       5.570118312 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  591) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.519328e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.854934e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.854934e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.499484e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.839377e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.839377e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086779e+00 +- 3.413793e-03 )  GeV^0
-TOTAL       :     2.405432 sec
+TOTAL       :     2.417592 sec
 INFO: No Floating Point Exceptions have been reported
-     7,053,566,121      cycles                           #    2.927 GHz                    
-    17,767,653,624      instructions                     #    2.52  insn per cycle         
-       2.410646458 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2:    0) (512y:    0) (512z:    0)
+     7,096,627,782      cycles                           #    2.930 GHz                    
+    17,791,937,206      instructions                     #    2.51  insn per cycle         
+       2.423399622 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.279648e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.384307e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.384307e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.172278e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.285532e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.285532e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.350250 sec
+TOTAL       :     1.370207 sec
 INFO: No Floating Point Exceptions have been reported
-     3,726,374,980      cycles                           #    2.751 GHz                    
-     8,258,716,755      instructions                     #    2.22  insn per cycle         
-       1.355419801 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3350) (512y:    0) (512z:    0)
+     3,756,028,310      cycles                           #    2.731 GHz                    
+     8,261,729,651      instructions                     #    2.20  insn per cycle         
+       1.376013238 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3367) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.799998e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.006589e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.006589e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.782108e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.008509e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.008509e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.276520 sec
+TOTAL       :     1.281162 sec
 INFO: No Floating Point Exceptions have been reported
-     3,537,758,951      cycles                           #    2.762 GHz                    
-     7,913,131,472      instructions                     #    2.24  insn per cycle         
-       1.281778078 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3196) (512y:   20) (512z:    0)
+     3,565,563,130      cycles                           #    2.772 GHz                    
+     7,911,366,462      instructions                     #    2.22  insn per cycle         
+       1.287010007 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3209) (512y:   20) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.474459e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.121773e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.121773e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.473993e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.131270e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.131270e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.704531 sec
+TOTAL       :     1.706601 sec
 INFO: No Floating Point Exceptions have been reported
-     3,253,759,575      cycles                           #    1.904 GHz                    
-     6,092,424,314      instructions                     #    1.87  insn per cycle         
-       1.709851210 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2287) (512y:   24) (512z: 2153)
+     3,274,416,803      cycles                           #    1.913 GHz                    
+     6,096,966,978      instructions                     #    1.86  insn per cycle         
+       1.712593107 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2251) (512y:   22) (512z: 2155)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt
index f704621ca8..8649c65a6a 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 00s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-29_23:34:36
+DATE: 2024-09-18_13:00:00
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -54,23 +50,26 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.623564e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.182304e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.386853e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.491816e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.706264e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.829121e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086805e+00 +- 3.414078e-03 )  GeV^0
-TOTAL       :     0.621738 sec
+TOTAL       :     0.638763 sec
 INFO: No Floating Point Exceptions have been reported
-     2,430,258,613      cycles                           #    2.875 GHz                    
-     3,766,141,245      instructions                     #    1.55  insn per cycle         
-       0.902091417 seconds time elapsed
+     2,486,309,752      cycles                           #    2.846 GHz                    
+     3,832,853,586      instructions                     #    1.54  insn per cycle         
+       0.933212094 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 149
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 124
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -88,20 +87,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.919517e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.974180e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.974180e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.926255e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.980755e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.980755e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086780e+00 +- 3.413794e-03 )  GeV^0
-TOTAL       :     5.547153 sec
+TOTAL       :     5.530359 sec
 INFO: No Floating Point Exceptions have been reported
-    16,239,383,395      cycles                           #    2.925 GHz                    
-    45,332,563,163      instructions                     #    2.79  insn per cycle         
-       5.552300455 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  592) (avx2:    0) (512y:    0) (512z:    0)
+    16,260,744,493      cycles                           #    2.938 GHz                    
+    45,331,881,354      instructions                     #    2.79  insn per cycle         
+       5.536242796 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  591) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -117,20 +119,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.514723e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.851864e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.851864e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.514119e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.847574e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.847574e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086779e+00 +- 3.413793e-03 )  GeV^0
-TOTAL       :     2.407844 sec
+TOTAL       :     2.409823 sec
 INFO: No Floating Point Exceptions have been reported
-     7,048,396,058      cycles                           #    2.922 GHz                    
-    17,769,003,065      instructions                     #    2.52  insn per cycle         
-       2.412983960 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2:    0) (512y:    0) (512z:    0)
+     7,091,224,967      cycles                           #    2.937 GHz                    
+    17,790,807,442      instructions                     #    2.51  insn per cycle         
+       2.415653910 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -146,20 +151,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.312587e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.437297e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.437297e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.315319e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.466327e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.466327e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.346063 sec
+TOTAL       :     1.347295 sec
 INFO: No Floating Point Exceptions have been reported
-     3,728,179,537      cycles                           #    2.761 GHz                    
-     8,258,013,414      instructions                     #    2.22  insn per cycle         
-       1.351324909 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3350) (512y:    0) (512z:    0)
+     3,748,135,716      cycles                           #    2.771 GHz                    
+     8,261,548,625      instructions                     #    2.20  insn per cycle         
+       1.353086220 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3367) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -175,20 +183,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.782227e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.003815e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.003815e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.772831e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.005617e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.005617e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.278814 sec
+TOTAL       :     1.281889 sec
 INFO: No Floating Point Exceptions have been reported
-     3,538,647,518      cycles                           #    2.758 GHz                    
-     7,913,188,307      instructions                     #    2.24  insn per cycle         
-       1.283975197 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3196) (512y:   20) (512z:    0)
+     3,559,044,656      cycles                           #    2.766 GHz                    
+     7,911,466,674      instructions                     #    2.22  insn per cycle         
+       1.287610992 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3209) (512y:   20) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -204,20 +215,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.475607e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.120414e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.120414e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.412498e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.103906e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.103906e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.703723 sec
+TOTAL       :     1.722151 sec
 INFO: No Floating Point Exceptions have been reported
-     3,247,851,973      cycles                           #    1.901 GHz                    
-     6,092,721,375      instructions                     #    1.88  insn per cycle         
-       1.708887964 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2287) (512y:   24) (512z: 2153)
+     3,304,024,823      cycles                           #    1.914 GHz                    
+     6,099,911,719      instructions                     #    1.85  insn per cycle         
+       1.727529111 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2251) (512y:   22) (512z: 2155)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt
index 6b132c1b1f..fbbd4d7aad 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 50s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-29_22:49:32
+DATE: 2024-09-18_12:12:45
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.471980e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.448283e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.703783e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.269829e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.739721e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.856627e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086719e+00 +- 3.413389e-03 )  GeV^0
-TOTAL       :     0.483699 sec
+TOTAL       :     0.490633 sec
 INFO: No Floating Point Exceptions have been reported
-     2,013,562,134      cycles                           #    2.853 GHz                    
-     2,909,226,103      instructions                     #    1.44  insn per cycle         
-       0.764230645 seconds time elapsed
+     2,054,923,204      cycles                           #    2.868 GHz                    
+     2,821,409,154      instructions                     #    1.37  insn per cycle         
+       0.774891828 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 128
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 126
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.954648e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.010377e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.010377e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.972061e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.029032e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.029032e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086780e+00 +- 3.413794e-03 )  GeV^0
-TOTAL       :     5.448737 sec
+TOTAL       :     5.402943 sec
 INFO: No Floating Point Exceptions have been reported
-    15,968,697,173      cycles                           #    2.929 GHz                    
-    44,442,467,905      instructions                     #    2.78  insn per cycle         
-       5.453950650 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  537) (avx2:    0) (512y:    0) (512z:    0)
+    15,938,200,378      cycles                           #    2.947 GHz                    
+    44,441,419,092      instructions                     #    2.79  insn per cycle         
+       5.408620560 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  536) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.288745e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.754008e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.754008e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.316687e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.790003e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.790003e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086779e+00 +- 3.413793e-03 )  GeV^0
-TOTAL       :     2.067756 sec
+TOTAL       :     2.058425 sec
 INFO: No Floating Point Exceptions have been reported
-     6,064,224,873      cycles                           #    2.927 GHz                    
-    17,073,609,650      instructions                     #    2.82  insn per cycle         
-       2.072883041 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 2864) (avx2:    0) (512y:    0) (512z:    0)
+     6,073,730,384      cycles                           #    2.944 GHz                    
+    17,080,831,031      instructions                     #    2.81  insn per cycle         
+       2.063919735 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 2863) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.044707e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.619635e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.619635e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.040290e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.607212e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.607212e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.819445 sec
+TOTAL       :     1.821129 sec
 INFO: No Floating Point Exceptions have been reported
-     5,009,520,595      cycles                           #    2.747 GHz                    
-    10,222,223,141      instructions                     #    2.04  insn per cycle         
-       1.824644374 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3893) (512y:    0) (512z:    0)
+     5,028,060,974      cycles                           #    2.754 GHz                    
+    10,226,327,467      instructions                     #    2.03  insn per cycle         
+       1.826739648 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3907) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.086668e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.667060e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.667060e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.109926e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.690770e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.690770e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.808098 sec
+TOTAL       :     1.802334 sec
 INFO: No Floating Point Exceptions have been reported
-     4,954,174,809      cycles                           #    2.733 GHz                    
-     9,992,697,574      instructions                     #    2.02  insn per cycle         
-       1.813464382 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3794) (512y:    2) (512z:    0)
+     4,967,999,007      cycles                           #    2.749 GHz                    
+     9,996,248,012      instructions                     #    2.01  insn per cycle         
+       1.807786513 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3806) (512y:    2) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.638616e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.961119e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.961119e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.589191e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.908384e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.908384e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     2.344859 sec
+TOTAL       :     2.373161 sec
 INFO: No Floating Point Exceptions have been reported
-     4,359,058,163      cycles                           #    1.856 GHz                    
-     8,441,473,825      instructions                     #    1.94  insn per cycle         
-       2.350013338 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2784) (512y:    4) (512z: 2752)
+     4,379,373,712      cycles                           #    1.842 GHz                    
+     8,445,292,719      instructions                     #    1.93  insn per cycle         
+       2.379096717 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2746) (512y:    4) (512z: 2754)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt
index 67668751a3..b94de9fae6 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 31s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-29_23:20:00
+DATE: 2024-09-18_12:44:41
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.693096e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.181249e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.397047e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.109159e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.754036e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.870271e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086719e+00 +- 3.413389e-03 )  GeV^0
-TOTAL       :     0.488701 sec
+TOTAL       :     0.491488 sec
 INFO: No Floating Point Exceptions have been reported
-     2,026,366,666      cycles                           #    2.849 GHz                    
-     2,929,563,128      instructions                     #    1.45  insn per cycle         
-       0.770424636 seconds time elapsed
+     2,077,663,912      cycles                           #    2.873 GHz                    
+     2,918,599,943      instructions                     #    1.40  insn per cycle         
+       0.780254295 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 149
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 124
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.491467e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.583014e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.583014e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.511694e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.603843e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.603843e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086780e+00 +- 3.413794e-03 )  GeV^0
-TOTAL       :     4.294115 sec
+TOTAL       :     4.261367 sec
 INFO: No Floating Point Exceptions have been reported
-    12,564,892,138      cycles                           #    2.923 GHz                    
-    34,594,410,065      instructions                     #    2.75  insn per cycle         
-       4.299702876 seconds time elapsed
+    12,578,636,437      cycles                           #    2.949 GHz                    
+    34,608,642,396      instructions                     #    2.75  insn per cycle         
+       4.266948834 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  683) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,27 +117,30 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.316170e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.785295e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.785295e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.245729e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.707902e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.707902e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086779e+00 +- 3.413793e-03 )  GeV^0
-TOTAL       :     2.059154 sec
+TOTAL       :     2.085904 sec
 INFO: No Floating Point Exceptions have been reported
-     6,055,050,463      cycles                           #    2.934 GHz                    
-    14,843,315,905      instructions                     #    2.45  insn per cycle         
-       2.064883066 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 2980) (avx2:    0) (512y:    0) (512z:    0)
+     6,139,006,311      cycles                           #    2.936 GHz                    
+    14,814,345,795      instructions                     #    2.41  insn per cycle         
+       2.091585873 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 2975) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028819e+00
-Avg ME (F77/C++)    = 2.0288193414453417
-Relative difference = 1.6829758681196702e-07
+Avg ME (F77/C++)    = 2.0288193755550310
+Relative difference = 1.8511017053446366e-07
 OK (relative difference <= 5E-3)
 =========================================================================
 runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP=
@@ -144,27 +149,30 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.125837e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.942815e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.942815e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.217326e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.053698e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.053698e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.556816 sec
+TOTAL       :     1.539150 sec
 INFO: No Floating Point Exceptions have been reported
-     4,301,919,832      cycles                           #    2.755 GHz                    
-     9,049,103,047      instructions                     #    2.10  insn per cycle         
-       1.562522898 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4446) (512y:    0) (512z:    0)
+     4,266,849,527      cycles                           #    2.764 GHz                    
+     9,068,527,132      instructions                     #    2.13  insn per cycle         
+       1.544604329 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4456) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
-Avg ME (F77/C++)    = 2.0288181974319741
-Relative difference = 9.731379272303266e-08
+Avg ME (F77/C++)    = 2.0288182069780305
+Relative difference = 1.0201902325125583e-07
 OK (relative difference <= 5E-3)
 =========================================================================
 runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP=
@@ -173,27 +181,30 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.332880e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.184727e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.184727e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.341390e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.190395e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.190395e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.516476 sec
+TOTAL       :     1.513835 sec
 INFO: No Floating Point Exceptions have been reported
-     4,186,126,979      cycles                           #    2.752 GHz                    
-     8,657,136,628      instructions                     #    2.07  insn per cycle         
-       1.522018326 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4213) (512y:    0) (512z:    0)
+     4,209,677,652      cycles                           #    2.772 GHz                    
+     8,658,962,407      instructions                     #    2.06  insn per cycle         
+       1.519314933 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4233) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
-Avg ME (F77/C++)    = 2.0288181974319741
-Relative difference = 9.731379272303266e-08
+Avg ME (F77/C++)    = 2.0288182069780305
+Relative difference = 1.0201902325125583e-07
 OK (relative difference <= 5E-3)
 =========================================================================
 runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP=
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.348540e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.787233e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.787233e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.363197e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.802509e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.802509e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     2.045299 sec
+TOTAL       :     2.041800 sec
 INFO: No Floating Point Exceptions have been reported
-     3,828,361,693      cycles                           #    1.868 GHz                    
-     7,802,705,560      instructions                     #    2.04  insn per cycle         
-       2.050733542 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4252) (512y:    0) (512z: 2556)
+     3,848,539,052      cycles                           #    1.880 GHz                    
+     7,805,686,420      instructions                     #    2.03  insn per cycle         
+       2.047559874 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4273) (512y:    0) (512z: 2558)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt
index 25cb308f89..647db6d470 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 30s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-29_23:20:20
+DATE: 2024-09-18_12:45:01
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.117401e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.441872e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.719277e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.181121e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.754734e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.875454e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086719e+00 +- 3.413389e-03 )  GeV^0
-TOTAL       :     0.484318 sec
+TOTAL       :     0.493098 sec
 INFO: No Floating Point Exceptions have been reported
-     2,027,044,969      cycles                           #    2.865 GHz                    
-     2,915,708,806      instructions                     #    1.44  insn per cycle         
-       0.765596160 seconds time elapsed
+     2,068,560,161      cycles                           #    2.866 GHz                    
+     2,913,404,401      instructions                     #    1.41  insn per cycle         
+       0.778994585 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 128
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 126
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.603125e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.703630e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.703630e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.673571e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.779018e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.779018e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086780e+00 +- 3.413794e-03 )  GeV^0
-TOTAL       :     4.113547 sec
+TOTAL       :     4.007457 sec
 INFO: No Floating Point Exceptions have been reported
-    11,856,983,433      cycles                           #    2.879 GHz                    
-    35,064,975,773      instructions                     #    2.96  insn per cycle         
-       4.119151430 seconds time elapsed
+    11,821,622,506      cycles                           #    2.947 GHz                    
+    35,077,213,703      instructions                     #    2.97  insn per cycle         
+       4.012923546 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  453) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.399477e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.883757e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.883757e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.446071e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.947640e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.947640e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086779e+00 +- 3.413793e-03 )  GeV^0
-TOTAL       :     2.027290 sec
+TOTAL       :     2.011399 sec
 INFO: No Floating Point Exceptions have been reported
-     5,947,707,323      cycles                           #    2.927 GHz                    
-    14,463,680,627      instructions                     #    2.43  insn per cycle         
-       2.032751601 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 2559) (avx2:    0) (512y:    0) (512z:    0)
+     5,918,531,500      cycles                           #    2.935 GHz                    
+    14,532,054,201      instructions                     #    2.46  insn per cycle         
+       2.017166521 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 2569) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
@@ -144,27 +149,30 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.403034e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.277469e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.277469e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.388337e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.293979e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.293979e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.501309 sec
+TOTAL       :     1.508049 sec
 INFO: No Floating Point Exceptions have been reported
-     4,151,474,281      cycles                           #    2.757 GHz                    
-     8,876,254,548      instructions                     #    2.14  insn per cycle         
-       1.506811786 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3556) (512y:    0) (512z:    0)
+     4,192,067,529      cycles                           #    2.771 GHz                    
+     8,850,538,175      instructions                     #    2.11  insn per cycle         
+       1.513555792 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3552) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
-Avg ME (F77/C++)    = 2.0288182104704902
-Relative difference = 1.0374044905426431e-07
+Avg ME (F77/C++)    = 2.0288182107033208
+Relative difference = 1.0385521077446488e-07
 OK (relative difference <= 5E-3)
 =========================================================================
 runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP=
@@ -173,27 +181,30 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.465468e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.351611e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.351611e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.539896e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.448863e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.448863e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.489055 sec
+TOTAL       :     1.476824 sec
 INFO: No Floating Point Exceptions have been reported
-     4,126,432,660      cycles                           #    2.763 GHz                    
-     8,402,940,741      instructions                     #    2.04  insn per cycle         
-       1.494518525 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3284) (512y:    0) (512z:    0)
+     4,124,218,335      cycles                           #    2.783 GHz                    
+     8,408,510,612      instructions                     #    2.04  insn per cycle         
+       1.482399691 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3296) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
-Avg ME (F77/C++)    = 2.0288182104704902
-Relative difference = 1.0374044905426431e-07
+Avg ME (F77/C++)    = 2.0288182107033208
+Relative difference = 1.0385521077446488e-07
 OK (relative difference <= 5E-3)
 =========================================================================
 runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP=
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.443520e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.895951e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.895951e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.510377e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.974414e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.974414e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     2.012161 sec
+TOTAL       :     1.989253 sec
 INFO: No Floating Point Exceptions have been reported
-     3,781,259,830      cycles                           #    1.876 GHz                    
-     7,694,397,332      instructions                     #    2.03  insn per cycle         
-       2.017683593 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3268) (512y:    0) (512z: 2108)
+     3,785,582,278      cycles                           #    1.899 GHz                    
+     7,698,584,647      instructions                     #    2.03  insn per cycle         
+       1.994773359 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3289) (512y:    0) (512z: 2110)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt
index 348de144bc..ac99bf7b60 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 55s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-29_22:49:54
+DATE: 2024-09-18_12:13:07
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.721947e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.164266e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.274422e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.415407e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.358342e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.002564e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.527209 sec
+TOTAL       :     0.531223 sec
 INFO: No Floating Point Exceptions have been reported
-     2,174,329,993      cycles                           #    2.863 GHz                    
-     3,151,780,179      instructions                     #    1.45  insn per cycle         
-       0.816795416 seconds time elapsed
+     2,212,715,399      cycles                           #    2.883 GHz                    
+     3,174,354,481      instructions                     #    1.43  insn per cycle         
+       0.824625337 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.792613e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.837824e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.837824e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.812224e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.858502e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.858502e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     5.955327 sec
+TOTAL       :     5.893700 sec
 INFO: No Floating Point Exceptions have been reported
-    17,397,526,002      cycles                           #    2.920 GHz                    
-    46,091,587,032      instructions                     #    2.65  insn per cycle         
-       5.960944985 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  618) (avx2:    0) (512y:    0) (512z:    0)
+    17,384,515,155      cycles                           #    2.947 GHz                    
+    46,085,827,160      instructions                     #    2.65  insn per cycle         
+       5.899425018 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  617) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.176482e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.332914e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.332914e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.230178e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.393068e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.393068e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.408214 sec
+TOTAL       :     3.353996 sec
 INFO: No Floating Point Exceptions have been reported
-     9,950,593,950      cycles                           #    2.916 GHz                    
-    27,591,843,158      instructions                     #    2.77  insn per cycle         
-       3.413688048 seconds time elapsed
+     9,906,776,741      cycles                           #    2.949 GHz                    
+    27,581,204,322      instructions                     #    2.78  insn per cycle         
+       3.359750594 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2581) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.032838e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.417454e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.417454e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.070803e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.467527e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.467527e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.190453 sec
+TOTAL       :     2.175848 sec
 INFO: No Floating Point Exceptions have been reported
-     6,006,383,686      cycles                           #    2.736 GHz                    
-    12,480,601,360      instructions                     #    2.08  insn per cycle         
-       2.196062875 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2762) (512y:    0) (512z:    0)
+     6,033,401,789      cycles                           #    2.767 GHz                    
+    12,481,778,172      instructions                     #    2.07  insn per cycle         
+       2.181604261 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2773) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.546431e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.015514e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.015514e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.576261e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.054850e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.054850e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     1.997058 sec
+TOTAL       :     1.987931 sec
 INFO: No Floating Point Exceptions have been reported
-     5,515,075,827      cycles                           #    2.755 GHz                    
-    11,914,073,909      instructions                     #    2.16  insn per cycle         
-       2.002560475 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2507) (512y:  146) (512z:    0)
+     5,526,359,959      cycles                           #    2.773 GHz                    
+    11,919,157,674      instructions                     #    2.16  insn per cycle         
+       1.993761374 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2518) (512y:  146) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.568540e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.754917e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.754917e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.583667e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.773486e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.773486e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.044039 sec
+TOTAL       :     3.034227 sec
 INFO: No Floating Point Exceptions have been reported
-     5,590,219,431      cycles                           #    1.834 GHz                    
-     8,106,058,271      instructions                     #    1.45  insn per cycle         
-       3.049627104 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1646) (512y:  126) (512z: 1862)
+     5,618,120,727      cycles                           #    1.849 GHz                    
+     8,105,692,593      instructions                     #    1.44  insn per cycle         
+       3.040009315 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1671) (512y:  126) (512z: 1862)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt
index 57c5b89892..d60a3db604 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 49s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-29_22:50:19
+DATE: 2024-09-18_12:13:32
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.735697e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.172278e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.284391e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.391860e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.272095e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.937370e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.527511 sec
+TOTAL       :     0.534370 sec
 INFO: No Floating Point Exceptions have been reported
-     2,171,700,119      cycles                           #    2.860 GHz                    
-     3,131,301,039      instructions                     #    1.44  insn per cycle         
-       0.816870399 seconds time elapsed
+     2,212,045,639      cycles                           #    2.882 GHz                    
+     3,154,512,029      instructions                     #    1.43  insn per cycle         
+       0.826500836 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 208
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 212
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.846511e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.895008e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.895008e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.857330e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.905433e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.905433e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     5.785374 sec
+TOTAL       :     5.752139 sec
 INFO: No Floating Point Exceptions have been reported
-    16,941,917,135      cycles                           #    2.926 GHz                    
-    45,116,651,747      instructions                     #    2.66  insn per cycle         
-       5.790862754 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  569) (avx2:    0) (512y:    0) (512z:    0)
+    16,956,103,485      cycles                           #    2.946 GHz                    
+    45,111,671,387      instructions                     #    2.66  insn per cycle         
+       5.757950281 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  568) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.350385e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.523191e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.523191e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.369201e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.545470e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.545470e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.236469 sec
+TOTAL       :     3.220654 sec
 INFO: No Floating Point Exceptions have been reported
-     9,506,236,409      cycles                           #    2.933 GHz                    
-    26,243,746,265      instructions                     #    2.76  insn per cycle         
-       3.242056418 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 2385) (avx2:    0) (512y:    0) (512z:    0)
+     9,518,675,134      cycles                           #    2.951 GHz                    
+    26,252,301,051      instructions                     #    2.76  insn per cycle         
+       3.226704286 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 2386) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.486304e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.788448e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.788448e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.516544e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.830416e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.830416e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.442979 sec
+TOTAL       :     2.429009 sec
 INFO: No Floating Point Exceptions have been reported
-     6,728,910,073      cycles                           #    2.749 GHz                    
-    14,027,146,476      instructions                     #    2.08  insn per cycle         
-       2.448326705 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2884) (512y:    0) (512z:    0)
+     6,737,120,781      cycles                           #    2.769 GHz                    
+    14,029,549,404      instructions                     #    2.08  insn per cycle         
+       2.434732608 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2896) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.752703e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.089158e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.089158e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.763106e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.113046e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.113046e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.312345 sec
+TOTAL       :     2.308488 sec
 INFO: No Floating Point Exceptions have been reported
-     6,377,970,315      cycles                           #    2.752 GHz                    
-    13,518,991,158      instructions                     #    2.12  insn per cycle         
-       2.317849497 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2523) (512y:  302) (512z:    0)
+     6,400,709,122      cycles                           #    2.767 GHz                    
+    13,521,645,446      instructions                     #    2.11  insn per cycle         
+       2.314138282 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2535) (512y:  302) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.573013e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.761275e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.761275e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.631126e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.827064e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.827064e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.040837 sec
+TOTAL       :     2.996339 sec
 INFO: No Floating Point Exceptions have been reported
-     5,577,789,241      cycles                           #    1.832 GHz                    
-     9,204,522,980      instructions                     #    1.65  insn per cycle         
-       3.046305567 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1431) (512y:  212) (512z: 2059)
+     5,581,413,243      cycles                           #    1.860 GHz                    
+     9,205,937,992      instructions                     #    1.65  insn per cycle         
+       3.002095742 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1456) (512y:  212) (512z: 2060)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt
index 07a9da8cf2..de5eca26a8 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 01m 05s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-08-29_22:50:44
+DATE: 2024-09-18_12:13:57
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.768055e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.041158e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.054837e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.672201e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.887935e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.992853e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     0.468421 sec
+TOTAL       :     0.477007 sec
 INFO: No Floating Point Exceptions have been reported
-     1,983,649,354      cycles                           #    2.860 GHz                    
-     2,849,635,281      instructions                     #    1.44  insn per cycle         
-       0.752061218 seconds time elapsed
+     1,987,624,447      cycles                           #    2.874 GHz                    
+     2,861,967,134      instructions                     #    1.44  insn per cycle         
+       0.751704376 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.093113e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.321988e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.335513e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.044656e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.231568e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.242034e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 6.734461e+02 +- 4.775415e+02 )  GeV^-2
-TOTAL       :     0.607399 sec
+TOTAL       :     0.614374 sec
 INFO: No Floating Point Exceptions have been reported
-     2,417,979,870      cycles                           #    2.867 GHz                    
-     3,633,633,356      instructions                     #    1.50  insn per cycle         
-       0.902950603 seconds time elapsed
+     2,464,089,898      cycles                           #    2.883 GHz                    
+     3,693,413,015      instructions                     #    1.50  insn per cycle         
+       0.914175309 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.416355e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.428534e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.428534e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.435389e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.447579e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.447579e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     6.801356 sec
+TOTAL       :     6.748690 sec
 INFO: No Floating Point Exceptions have been reported
-    19,904,115,348      cycles                           #    2.925 GHz                    
-    59,914,819,169      instructions                     #    3.01  insn per cycle         
-       6.805500502 seconds time elapsed
+    19,905,580,584      cycles                           #    2.948 GHz                    
+    59,914,464,179      instructions                     #    3.01  insn per cycle         
+       6.753011110 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.569173e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.611795e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.611795e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.605126e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.648126e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.648126e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     3.605969 sec
+TOTAL       :     3.577729 sec
 INFO: No Floating Point Exceptions have been reported
-    10,580,697,478      cycles                           #    2.932 GHz                    
-    31,088,445,704      instructions                     #    2.94  insn per cycle         
-       3.610227674 seconds time elapsed
+    10,567,541,735      cycles                           #    2.951 GHz                    
+    31,084,954,146      instructions                     #    2.94  insn per cycle         
+       3.582009862 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.052369e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.218380e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.218380e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.119843e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.286275e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.286275e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.830002 sec
+TOTAL       :     1.816628 sec
 INFO: No Floating Point Exceptions have been reported
-     5,001,433,513      cycles                           #    2.728 GHz                    
-    11,404,945,118      instructions                     #    2.28  insn per cycle         
-       1.834254964 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4635) (512y:    0) (512z:    0)
+     5,009,875,098      cycles                           #    2.752 GHz                    
+    11,404,863,740      instructions                     #    2.28  insn per cycle         
+       1.820981146 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4642) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.021999e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.042648e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.042648e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.027376e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.048667e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.048667e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.623312 sec
+TOTAL       :     1.614828 sec
 INFO: No Floating Point Exceptions have been reported
-     4,444,423,908      cycles                           #    2.732 GHz                    
-    10,663,668,834      instructions                     #    2.40  insn per cycle         
-       1.627519035 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4371) (512y:   91) (512z:    0)
+     4,447,516,452      cycles                           #    2.748 GHz                    
+    10,663,621,215      instructions                     #    2.40  insn per cycle         
+       1.619180273 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4378) (512y:   92) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.029000e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.130556e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.130556e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.153517e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.257338e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.257338e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     2.353437 sec
+TOTAL       :     2.312792 sec
 INFO: No Floating Point Exceptions have been reported
-     4,139,182,535      cycles                           #    1.756 GHz                    
-     5,966,452,479      instructions                     #    1.44  insn per cycle         
-       2.357699578 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1605) (512y:   95) (512z: 3576)
+     4,128,948,366      cycles                           #    1.783 GHz                    
+     5,970,641,302      instructions                     #    1.45  insn per cycle         
+       2.317202499 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1620) (512y:   94) (512z: 3577)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt
index 3b4a073e1f..9c43264546 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 14s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-08-29_23:28:57
+DATE: 2024-09-18_12:53:47
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -57,15 +53,16 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.524968e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.790522e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.790522e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.507916e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.178599e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.178599e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     0.498207 sec
+TOTAL       :     0.502648 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,034,885,127      cycles                           #    2.867 GHz                    
-     3,099,307,889      instructions                     #    1.52  insn per cycle         
-       0.766751153 seconds time elapsed
+     2,068,761,834      cycles                           #    2.877 GHz                    
+     3,090,755,102      instructions                     #    1.49  insn per cycle         
+       0.775689457 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
@@ -83,19 +80,23 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.638749e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.629765e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.629765e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.673734e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.373672e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.373672e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 6.734461e+02 +- 4.775415e+02 )  GeV^-2
-TOTAL       :     0.835698 sec
+TOTAL       :     0.833395 sec
 INFO: No Floating Point Exceptions have been reported
-     3,068,002,737      cycles                           #    2.826 GHz                    
-     4,965,731,705      instructions                     #    1.62  insn per cycle         
-       1.143737678 seconds time elapsed
+INFO: No Floating Point Exceptions have been reported
+     3,144,886,808      cycles                           #    2.895 GHz                    
+     5,022,532,373      instructions                     #    1.60  insn per cycle         
+       1.144806482 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -114,20 +115,24 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.419094e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.431319e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.431319e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.430493e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.443016e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.443016e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     6.801659 sec
+TOTAL       :     6.770345 sec
 INFO: No Floating Point Exceptions have been reported
-    19,950,430,190      cycles                           #    2.932 GHz                    
-    59,924,799,433      instructions                     #    3.00  insn per cycle         
-       6.806231180 seconds time elapsed
+INFO: No Floating Point Exceptions have been reported
+    19,935,799,744      cycles                           #    2.943 GHz                    
+    59,921,717,219      instructions                     #    3.01  insn per cycle         
+       6.775096176 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,24 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.563840e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.607229e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.607229e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.571029e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.615207e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.615207e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     3.618278 sec
+TOTAL       :     3.613337 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    10,621,531,461      cycles                           #    2.933 GHz                    
-    31,136,479,712      instructions                     #    2.93  insn per cycle         
-       3.622674421 seconds time elapsed
+    10,624,808,815      cycles                           #    2.938 GHz                    
+    31,136,068,452      instructions                     #    2.93  insn per cycle         
+       3.618153867 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -174,20 +183,24 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.968662e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.136792e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.136792e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.976002e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.144750e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.144750e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.855111 sec
+TOTAL       :     1.854528 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     5,050,268,142      cycles                           #    2.717 GHz                    
-    11,453,960,155      instructions                     #    2.27  insn per cycle         
-       1.859579114 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4635) (512y:    0) (512z:    0)
+     5,122,960,994      cycles                           #    2.757 GHz                    
+    11,456,752,385      instructions                     #    2.24  insn per cycle         
+       1.859209871 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4642) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -204,20 +217,24 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.010354e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.031565e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.031565e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.023623e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.045107e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.045107e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.650652 sec
+TOTAL       :     1.629549 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     4,494,300,346      cycles                           #    2.716 GHz                    
-    10,714,275,385      instructions                     #    2.38  insn per cycle         
-       1.655223138 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4371) (512y:   91) (512z:    0)
+     4,493,284,400      cycles                           #    2.751 GHz                    
+    10,714,819,935      instructions                     #    2.38  insn per cycle         
+       1.634203375 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4378) (512y:   92) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -234,20 +251,24 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.026438e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.131749e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.131749e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.121040e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.229108e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.229108e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     2.362018 sec
+TOTAL       :     2.332216 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     4,180,671,804      cycles                           #    1.768 GHz                    
-     6,004,633,668      instructions                     #    1.44  insn per cycle         
-       2.366452325 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1605) (512y:   95) (512z: 3576)
+     4,174,771,858      cycles                           #    1.787 GHz                    
+     6,010,349,590      instructions                     #    1.44  insn per cycle         
+       2.336931936 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1620) (512y:   94) (512z: 3577)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt
index ec909fdebc..8cdcf50b56 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 50s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-08-29_22:51:10
+DATE: 2024-09-18_12:14:23
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.808286e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.043420e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.056801e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.625266e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.900146e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.003656e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     0.468123 sec
+TOTAL       :     0.475988 sec
 INFO: No Floating Point Exceptions have been reported
-     1,982,122,406      cycles                           #    2.866 GHz                    
-     2,840,505,864      instructions                     #    1.43  insn per cycle         
-       0.749220422 seconds time elapsed
+     1,977,726,110      cycles                           #    2.850 GHz                    
+     2,827,901,574      instructions                     #    1.43  insn per cycle         
+       0.751347521 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.087347e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.314577e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.327755e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.046576e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.234116e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.244538e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 6.734461e+02 +- 4.775415e+02 )  GeV^-2
-TOTAL       :     0.599896 sec
+TOTAL       :     0.611168 sec
 INFO: No Floating Point Exceptions have been reported
-     2,403,528,526      cycles                           #    2.872 GHz                    
-     3,686,433,169      instructions                     #    1.53  insn per cycle         
-       0.895610206 seconds time elapsed
+     2,454,142,578      cycles                           #    2.886 GHz                    
+     3,695,001,143      instructions                     #    1.51  insn per cycle         
+       0.909771724 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.407480e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.419549e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.419549e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.436838e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.448877e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.448877e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     6.825988 sec
+TOTAL       :     6.744113 sec
 INFO: No Floating Point Exceptions have been reported
-    19,902,263,810      cycles                           #    2.914 GHz                    
-    60,127,973,180      instructions                     #    3.02  insn per cycle         
-       6.830146319 seconds time elapsed
+    19,898,434,725      cycles                           #    2.949 GHz                    
+    60,128,447,647      instructions                     #    3.02  insn per cycle         
+       6.748351399 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1322) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.552176e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.594726e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.594726e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.649169e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.692956e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.692956e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     3.618973 sec
+TOTAL       :     3.544209 sec
 INFO: No Floating Point Exceptions have been reported
-    10,491,172,393      cycles                           #    2.896 GHz                    
-    30,689,021,169      instructions                     #    2.93  insn per cycle         
-       3.623315110 seconds time elapsed
+    10,481,283,758      cycles                           #    2.954 GHz                    
+    30,686,827,574      instructions                     #    2.93  insn per cycle         
+       3.548515404 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 5047) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.798771e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.955422e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.955422e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.897572e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.058943e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.058943e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.882242 sec
+TOTAL       :     1.861619 sec
 INFO: No Floating Point Exceptions have been reported
-     5,138,339,484      cycles                           #    2.725 GHz                    
-    11,838,435,308      instructions                     #    2.30  insn per cycle         
-       1.886500452 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4741) (512y:    0) (512z:    0)
+     5,141,047,361      cycles                           #    2.756 GHz                    
+    11,838,355,420      instructions                     #    2.30  insn per cycle         
+       1.866119668 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4746) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.596264e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.781452e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.781452e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.640218e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.828831e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.828831e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.727060 sec
+TOTAL       :     1.719787 sec
 INFO: No Floating Point Exceptions have been reported
-     4,719,460,247      cycles                           #    2.727 GHz                    
-    11,163,955,032      instructions                     #    2.37  insn per cycle         
-       1.731396360 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4396) (512y:  245) (512z:    0)
+     4,732,734,719      cycles                           #    2.746 GHz                    
+    11,163,471,114      instructions                     #    2.36  insn per cycle         
+       1.724312193 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4403) (512y:  246) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.994756e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.093121e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.093121e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.072241e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.175446e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.175446e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     2.364545 sec
+TOTAL       :     2.339175 sec
 INFO: No Floating Point Exceptions have been reported
-     4,159,347,839      cycles                           #    1.757 GHz                    
-     6,218,332,377      instructions                     #    1.50  insn per cycle         
-       2.368956290 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1501) (512y:  140) (512z: 3678)
+     4,159,319,454      cycles                           #    1.776 GHz                    
+     6,222,343,045      instructions                     #    1.50  insn per cycle         
+       2.343565013 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1516) (512y:  139) (512z: 3679)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt
index 730624ea47..b9aad18eeb 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 56s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-08-29_22:51:35
+DATE: 2024-09-18_12:14:49
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,17 +49,17 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.278215e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.944384e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.016796e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.682161e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.012912e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.052707e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008472e+02 +- 5.002447e+01 )  GeV^-2
-TOTAL       :     0.451173 sec
+TOTAL       :     0.457449 sec
 INFO: No Floating Point Exceptions have been reported
-     1,919,633,470      cycles                           #    2.857 GHz                    
-     2,728,963,073      instructions                     #    1.42  insn per cycle         
-       0.729048793 seconds time elapsed
+     1,934,954,114      cycles                           #    2.865 GHz                    
+     2,736,882,841      instructions                     #    1.41  insn per cycle         
+       0.732650423 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 227
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 226
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
 runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP=
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.979292e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.903991e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.964886e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.683155e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.385425e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.426136e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 6.630097e+02 +- 4.770717e+02 )  GeV^-2
-TOTAL       :     0.499205 sec
+TOTAL       :     0.507459 sec
 INFO: No Floating Point Exceptions have been reported
-     2,102,156,543      cycles                           #    2.874 GHz                    
-     3,076,367,109      instructions                     #    1.46  insn per cycle         
-       0.788612885 seconds time elapsed
+     2,120,407,613      cycles                           #    2.883 GHz                    
+     3,024,448,335      instructions                     #    1.43  insn per cycle         
+       0.792985016 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.509899e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.522866e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.522866e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.506015e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.518972e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.518972e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.009236e+02 +- 5.002643e+01 )  GeV^-2
-TOTAL       :     6.546451 sec
+TOTAL       :     6.556943 sec
 INFO: No Floating Point Exceptions have been reported
-    19,203,967,265      cycles                           #    2.932 GHz                    
-    59,613,462,994      instructions                     #    3.10  insn per cycle         
-       6.550613990 seconds time elapsed
+    19,264,218,294      cycles                           #    2.937 GHz                    
+    59,614,798,383      instructions                     #    3.09  insn per cycle         
+       6.560956742 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  959) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.007790e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.143555e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.143555e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.070356e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.207853e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.207853e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.009236e+02 +- 5.002643e+01 )  GeV^-2
-TOTAL       :     2.063842 sec
+TOTAL       :     2.048077 sec
 INFO: No Floating Point Exceptions have been reported
-     6,028,053,387      cycles                           #    2.916 GHz                    
-    17,061,568,102      instructions                     #    2.83  insn per cycle         
-       2.067953284 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 5855) (avx2:    0) (512y:    0) (512z:    0)
+     6,023,874,049      cycles                           #    2.936 GHz                    
+    17,061,893,848      instructions                     #    2.83  insn per cycle         
+       2.052246672 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 5856) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.739489e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.800542e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.800542e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.743575e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.804848e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.804848e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008857e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     0.959970 sec
+TOTAL       :     0.957876 sec
 INFO: No Floating Point Exceptions have been reported
-     2,633,014,395      cycles                           #    2.733 GHz                    
-     6,186,422,079      instructions                     #    2.35  insn per cycle         
-       0.964103808 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5091) (512y:    0) (512z:    0)
+     2,640,887,772      cycles                           #    2.747 GHz                    
+     6,187,336,173      instructions                     #    2.34  insn per cycle         
+       0.962119669 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5105) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.914942e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.989247e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.989247e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.915124e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.989470e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.989470e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008857e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     0.873915 sec
+TOTAL       :     0.873685 sec
 INFO: No Floating Point Exceptions have been reported
-     2,398,703,934      cycles                           #    2.734 GHz                    
-     5,790,632,910      instructions                     #    2.41  insn per cycle         
-       0.878146083 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4896) (512y:   36) (512z:    0)
+     2,402,820,009      cycles                           #    2.739 GHz                    
+     5,790,162,566      instructions                     #    2.41  insn per cycle         
+       0.877828237 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4906) (512y:   37) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.440202e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.483048e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.483048e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.453255e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.496895e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.496895e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008856e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     1.157768 sec
+TOTAL       :     1.147590 sec
 INFO: No Floating Point Exceptions have been reported
-     2,073,923,596      cycles                           #    1.786 GHz                    
-     3,391,319,853      instructions                     #    1.64  insn per cycle         
-       1.162015056 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2214) (512y:   39) (512z: 3787)
+     2,076,037,431      cycles                           #    1.804 GHz                    
+     3,391,394,333      instructions                     #    1.63  insn per cycle         
+       1.151886126 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2237) (512y:   37) (512z: 3789)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt
index d8e5b06899..1d937591ab 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 02s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-08-29_23:29:23
+DATE: 2024-09-18_12:54:13
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -57,21 +53,22 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.767858e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.009815e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.009815e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.452792e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.504415e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.504415e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 1.009071e+02 +- 5.002295e+01 )  GeV^-2
-TOTAL       :     0.464758 sec
+TOTAL       :     0.473960 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     1,927,287,410      cycles                           #    2.859 GHz                    
-     2,875,384,287      instructions                     #    1.49  insn per cycle         
-       0.732402984 seconds time elapsed
+     1,972,315,192      cycles                           #    2.868 GHz                    
+     2,911,549,142      instructions                     #    1.48  insn per cycle         
+       0.746422585 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384)
 WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384)
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 227
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 226
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
 runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP=
@@ -83,19 +80,23 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.583680e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.445503e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.445503e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.537799e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.260766e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.260766e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 6.737499e+02 +- 4.776369e+02 )  GeV^-2
-TOTAL       :     0.650296 sec
+TOTAL       :     0.654007 sec
 INFO: No Floating Point Exceptions have been reported
-     2,502,497,718      cycles                           #    2.846 GHz                    
-     3,902,267,322      instructions                     #    1.56  insn per cycle         
-       0.938009872 seconds time elapsed
+INFO: No Floating Point Exceptions have been reported
+     2,561,684,908      cycles                           #    2.881 GHz                    
+     3,893,804,940      instructions                     #    1.52  insn per cycle         
+       0.947747663 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -114,20 +115,24 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.507798e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.520716e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.520716e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.511557e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.524668e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.524668e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.009236e+02 +- 5.002643e+01 )  GeV^-2
-TOTAL       :     6.556234 sec
+TOTAL       :     6.546653 sec
 INFO: No Floating Point Exceptions have been reported
-    19,237,442,122      cycles                           #    2.933 GHz                    
-    59,617,865,061      instructions                     #    3.10  insn per cycle         
-       6.560395287 seconds time elapsed
+INFO: No Floating Point Exceptions have been reported
+    19,271,491,646      cycles                           #    2.942 GHz                    
+    59,619,016,957      instructions                     #    3.09  insn per cycle         
+       6.550964309 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  959) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,24 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.856144e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.991623e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.991623e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.075407e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.222998e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.222998e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.009236e+02 +- 5.002643e+01 )  GeV^-2
-TOTAL       :     2.108789 sec
+TOTAL       :     2.052234 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     6,150,269,807      cycles                           #    2.912 GHz                    
-    17,110,455,897      instructions                     #    2.78  insn per cycle         
-       2.113131923 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 5855) (avx2:    0) (512y:    0) (512z:    0)
+     6,045,406,228      cycles                           #    2.940 GHz                    
+    17,110,194,161      instructions                     #    2.83  insn per cycle         
+       2.056632379 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 5856) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -174,20 +183,24 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.735458e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.798185e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.798185e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.741961e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.804731e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.804731e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008857e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     0.966917 sec
+TOTAL       :     0.963728 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,657,777,061      cycles                           #    2.739 GHz                    
-     6,223,729,758      instructions                     #    2.34  insn per cycle         
-       0.971167979 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5091) (512y:    0) (512z:    0)
+     2,665,943,840      cycles                           #    2.756 GHz                    
+     6,224,556,067      instructions                     #    2.33  insn per cycle         
+       0.968076233 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5105) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -204,20 +217,24 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.907068e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.983078e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.983078e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.889422e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.966151e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.966151e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008857e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     0.881736 sec
+TOTAL       :     0.890681 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,422,460,405      cycles                           #    2.736 GHz                    
-     5,827,345,262      instructions                     #    2.41  insn per cycle         
-       0.886046409 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4896) (512y:   36) (512z:    0)
+     2,436,025,235      cycles                           #    2.723 GHz                    
+     5,827,123,635      instructions                     #    2.39  insn per cycle         
+       0.895318545 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4906) (512y:   37) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -234,20 +251,24 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.431727e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.475110e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.475110e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.443092e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.487537e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.487537e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008856e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     1.169788 sec
+TOTAL       :     1.160657 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,100,278,925      cycles                           #    1.790 GHz                    
-     3,432,954,471      instructions                     #    1.63  insn per cycle         
-       1.174173020 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2214) (512y:   39) (512z: 3787)
+     2,101,025,117      cycles                           #    1.805 GHz                    
+     3,433,428,500      instructions                     #    1.63  insn per cycle         
+       1.165027687 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2237) (512y:   37) (512z: 3789)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt
index ae36751a1d..4251937b55 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 49s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-08-29_22:51:56
+DATE: 2024-09-18_12:15:10
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,17 +49,17 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.277743e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.945397e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.020092e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.677136e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.031870e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.066369e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008472e+02 +- 5.002447e+01 )  GeV^-2
-TOTAL       :     0.449999 sec
+TOTAL       :     0.462735 sec
 INFO: No Floating Point Exceptions have been reported
-     1,921,589,382      cycles                           #    2.855 GHz                    
-     2,723,588,998      instructions                     #    1.42  insn per cycle         
-       0.730462057 seconds time elapsed
+     1,946,975,962      cycles                           #    2.877 GHz                    
+     2,736,807,999      instructions                     #    1.41  insn per cycle         
+       0.735970844 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 221
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 226
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
 runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP=
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.994291e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.944236e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.006487e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.680283e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.366147e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.409371e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 6.630097e+02 +- 4.770717e+02 )  GeV^-2
-TOTAL       :     0.501120 sec
+TOTAL       :     0.508125 sec
 INFO: No Floating Point Exceptions have been reported
-     2,082,733,552      cycles                           #    2.864 GHz                    
-     3,004,954,151      instructions                     #    1.44  insn per cycle         
-       0.784749526 seconds time elapsed
+     2,113,521,464      cycles                           #    2.866 GHz                    
+     3,052,176,829      instructions                     #    1.44  insn per cycle         
+       0.794975331 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.482604e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.495412e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.495412e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.498167e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.510888e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.510888e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.009236e+02 +- 5.002643e+01 )  GeV^-2
-TOTAL       :     6.618213 sec
+TOTAL       :     6.577268 sec
 INFO: No Floating Point Exceptions have been reported
-    19,382,805,943      cycles                           #    2.927 GHz                    
-    59,350,891,670      instructions                     #    3.06  insn per cycle         
-       6.622341307 seconds time elapsed
+    19,407,580,643      cycles                           #    2.949 GHz                    
+    59,354,263,399      instructions                     #    3.06  insn per cycle         
+       6.581442326 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1027) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.395581e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.546831e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.546831e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.398785e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.549497e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.549497e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.009236e+02 +- 5.002643e+01 )  GeV^-2
-TOTAL       :     1.969338 sec
+TOTAL       :     1.968439 sec
 INFO: No Floating Point Exceptions have been reported
-     5,753,637,450      cycles                           #    2.917 GHz                    
-    16,850,481,816      instructions                     #    2.93  insn per cycle         
-       1.973469763 seconds time elapsed
+     5,775,824,576      cycles                           #    2.929 GHz                    
+    16,849,685,670      instructions                     #    2.92  insn per cycle         
+       1.972573842 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 5610) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.511813e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.557982e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.557982e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.527004e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.573961e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.573961e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008857e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     1.101839 sec
+TOTAL       :     1.091009 sec
 INFO: No Floating Point Exceptions have been reported
-     3,013,272,940      cycles                           #    2.726 GHz                    
-     6,847,941,445      instructions                     #    2.27  insn per cycle         
-       1.106014161 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5721) (512y:    0) (512z:    0)
+     3,021,095,483      cycles                           #    2.760 GHz                    
+     6,848,870,145      instructions                     #    2.27  insn per cycle         
+       1.095189540 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5735) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.628019e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.682162e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.682162e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.611080e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.664155e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.664155e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008857e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     1.024330 sec
+TOTAL       :     1.035237 sec
 INFO: No Floating Point Exceptions have been reported
-     2,804,905,619      cycles                           #    2.729 GHz                    
-     6,436,635,506      instructions                     #    2.29  insn per cycle         
-       1.028506828 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5497) (512y:   22) (512z:    0)
+     2,858,508,214      cycles                           #    2.752 GHz                    
+     6,438,110,737      instructions                     #    2.25  insn per cycle         
+       1.039480125 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5509) (512y:   23) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.315584e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.351667e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.351667e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.329594e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.366106e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.366106e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008856e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     1.265651 sec
+TOTAL       :     1.252535 sec
 INFO: No Floating Point Exceptions have been reported
-     2,253,548,105      cycles                           #    1.776 GHz                    
-     3,754,786,325      instructions                     #    1.67  insn per cycle         
-       1.269807096 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2445) (512y:   29) (512z: 4082)
+     2,255,457,879      cycles                           #    1.796 GHz                    
+     3,755,585,205      instructions                     #    1.67  insn per cycle         
+       1.256791945 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2467) (512y:   28) (512z: 4084)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt
index 16d585d797..09551986c9 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 56s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-08-29_22:52:17
+DATE: 2024-09-18_12:15:31
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.764002e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.041698e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.055474e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.594581e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.871606e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.970951e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     0.471629 sec
+TOTAL       :     0.473359 sec
 INFO: No Floating Point Exceptions have been reported
-     1,944,394,459      cycles                           #    2.835 GHz                    
-     2,789,943,135      instructions                     #    1.43  insn per cycle         
-       0.744922955 seconds time elapsed
+     1,992,272,995      cycles                           #    2.881 GHz                    
+     2,873,441,271      instructions                     #    1.44  insn per cycle         
+       0.748198068 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.090832e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.317008e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.330502e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.037058e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.222746e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.233058e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 6.734461e+02 +- 4.775415e+02 )  GeV^-2
-TOTAL       :     0.609477 sec
+TOTAL       :     0.613274 sec
 INFO: No Floating Point Exceptions have been reported
-     2,404,259,561      cycles                           #    2.844 GHz                    
-     3,644,904,805      instructions                     #    1.52  insn per cycle         
-       0.905916250 seconds time elapsed
+     2,464,972,790      cycles                           #    2.891 GHz                    
+     3,748,511,486      instructions                     #    1.52  insn per cycle         
+       0.912099116 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.377685e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.389366e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.389366e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.399672e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.411896e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.411896e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     6.911680 sec
+TOTAL       :     6.848810 sec
 INFO: No Floating Point Exceptions have been reported
-    20,218,013,208      cycles                           #    2.924 GHz                    
-    60,950,520,452      instructions                     #    3.01  insn per cycle         
-       6.915909091 seconds time elapsed
+    20,197,037,339      cycles                           #    2.948 GHz                    
+    60,947,415,438      instructions                     #    3.02  insn per cycle         
+       6.853052511 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1220) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.609877e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.653027e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.653027e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.642526e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.687090e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.687090e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     3.574180 sec
+TOTAL       :     3.549259 sec
 INFO: No Floating Point Exceptions have been reported
-    10,450,566,835      cycles                           #    2.921 GHz                    
-    30,821,772,550      instructions                     #    2.95  insn per cycle         
-       3.578458596 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 5350) (avx2:    0) (512y:    0) (512z:    0)
+    10,477,481,501      cycles                           #    2.949 GHz                    
+    30,820,930,825      instructions                     #    2.94  insn per cycle         
+       3.553666211 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 5351) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.114697e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.284002e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.284002e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.196444e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.370621e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.370621e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.817728 sec
+TOTAL       :     1.802004 sec
 INFO: No Floating Point Exceptions have been reported
-     4,954,203,602      cycles                           #    2.720 GHz                    
-    11,359,039,929      instructions                     #    2.29  insn per cycle         
-       1.821986435 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4764) (512y:    0) (512z:    0)
+     4,965,652,288      cycles                           #    2.750 GHz                    
+    11,359,248,854      instructions                     #    2.29  insn per cycle         
+       1.806342805 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4776) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.033408e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.054510e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.054510e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.041756e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.063436e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.063436e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.605536 sec
+TOTAL       :     1.592695 sec
 INFO: No Floating Point Exceptions have been reported
-     4,392,902,582      cycles                           #    2.730 GHz                    
-    10,608,545,501      instructions                     #    2.41  insn per cycle         
-       1.609849025 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4491) (512y:   83) (512z:    0)
+     4,382,366,442      cycles                           #    2.746 GHz                    
+    10,608,797,295      instructions                     #    2.42  insn per cycle         
+       1.596978533 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4503) (512y:   84) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.852292e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.948447e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.948447e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.957560e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.055998e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.055998e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     2.413629 sec
+TOTAL       :     2.377375 sec
 INFO: No Floating Point Exceptions have been reported
-     4,253,955,957      cycles                           #    1.760 GHz                    
-     6,166,525,936      instructions                     #    1.45  insn per cycle         
-       2.418081364 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2117) (512y:  117) (512z: 3652)
+     4,237,877,454      cycles                           #    1.780 GHz                    
+     6,168,521,326      instructions                     #    1.46  insn per cycle         
+       2.381770690 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2143) (512y:  116) (512z: 3653)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt
index fa2257a7a9..e31dab3bcb 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 50s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-08-29_22:52:43
+DATE: 2024-09-18_12:15:57
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.722618e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.034655e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.047991e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.665772e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.933205e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.041936e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     0.466796 sec
+TOTAL       :     0.474243 sec
 INFO: No Floating Point Exceptions have been reported
-     1,953,188,947      cycles                           #    2.854 GHz                    
-     2,820,575,421      instructions                     #    1.44  insn per cycle         
-       0.741421075 seconds time elapsed
+     1,991,405,654      cycles                           #    2.879 GHz                    
+     2,864,466,394      instructions                     #    1.44  insn per cycle         
+       0.749070557 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.084249e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.309437e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.322993e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.042210e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.228789e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.239192e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 6.734461e+02 +- 4.775415e+02 )  GeV^-2
-TOTAL       :     0.605761 sec
+TOTAL       :     0.610923 sec
 INFO: No Floating Point Exceptions have been reported
-     2,416,308,048      cycles                           #    2.845 GHz                    
-     3,608,210,606      instructions                     #    1.49  insn per cycle         
-       0.910078060 seconds time elapsed
+     2,452,672,007      cycles                           #    2.882 GHz                    
+     3,739,836,978      instructions                     #    1.52  insn per cycle         
+       0.910769372 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.377305e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.389054e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.389054e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.379932e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.391571e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.391571e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     6.912722 sec
+TOTAL       :     6.905036 sec
 INFO: No Floating Point Exceptions have been reported
-    20,223,820,603      cycles                           #    2.924 GHz                    
-    61,171,685,276      instructions                     #    3.02  insn per cycle         
-       6.916884719 seconds time elapsed
+    20,270,175,803      cycles                           #    2.935 GHz                    
+    61,175,514,110      instructions                     #    3.02  insn per cycle         
+       6.909213331 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1272) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.615705e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.659353e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.659353e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.712701e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.757964e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.757964e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     3.569504 sec
+TOTAL       :     3.496353 sec
 INFO: No Floating Point Exceptions have been reported
-    10,370,296,215      cycles                           #    2.902 GHz                    
-    30,533,837,695      instructions                     #    2.94  insn per cycle         
-       3.573867995 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 5154) (avx2:    0) (512y:    0) (512z:    0)
+    10,330,450,764      cycles                           #    2.952 GHz                    
+    30,532,965,755      instructions                     #    2.96  insn per cycle         
+       3.500721812 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 5155) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.737237e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.894376e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.894376e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.873461e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.031366e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.031366e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.895286 sec
+TOTAL       :     1.866310 sec
 INFO: No Floating Point Exceptions have been reported
-     5,142,071,518      cycles                           #    2.708 GHz                    
-    11,872,872,158      instructions                     #    2.31  insn per cycle         
-       1.899520112 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4875) (512y:    0) (512z:    0)
+     5,149,448,063      cycles                           #    2.754 GHz                    
+    11,872,714,422      instructions                     #    2.31  insn per cycle         
+       1.870704205 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4887) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.694774e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.881575e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.881575e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.721277e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.910902e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.910902e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.709775 sec
+TOTAL       :     1.705742 sec
 INFO: No Floating Point Exceptions have been reported
-     4,671,739,679      cycles                           #    2.727 GHz                    
-    11,166,170,928      instructions                     #    2.39  insn per cycle         
-       1.713950914 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4496) (512y:  238) (512z:    0)
+     4,682,307,882      cycles                           #    2.740 GHz                    
+    11,166,992,215      instructions                     #    2.38  insn per cycle         
+       1.710031590 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4508) (512y:  239) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.779046e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.874544e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.874544e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.916313e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.015099e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.015099e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     2.439248 sec
+TOTAL       :     2.391177 sec
 INFO: No Floating Point Exceptions have been reported
-     4,266,577,740      cycles                           #    1.747 GHz                    
-     6,406,035,694      instructions                     #    1.50  insn per cycle         
-       2.443561271 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2013) (512y:  163) (512z: 3730)
+     4,255,173,095      cycles                           #    1.777 GHz                    
+     6,409,630,981      instructions                     #    1.51  insn per cycle         
+       2.395610797 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2039) (512y:  162) (512z: 3731)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt
index ac422c575d..e60a3b56f2 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 01m 15s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-29_22:53:09
+DATE: 2024-09-18_12:16:23
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.484618e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.512495e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.514715e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.313288e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.338946e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.340893e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.529842 sec
+TOTAL       :     0.535030 sec
 INFO: No Floating Point Exceptions have been reported
-     2,204,935,727      cycles                           #    2.851 GHz                    
-     3,383,078,845      instructions                     #    1.53  insn per cycle         
-       0.834801026 seconds time elapsed
+     2,203,915,832      cycles                           #    2.863 GHz                    
+     3,411,363,725      instructions                     #    1.55  insn per cycle         
+       0.826937803 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.112342e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.144521e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.145898e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.139082e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.168902e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.170140e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.050717 sec
+TOTAL       :     3.048810 sec
 INFO: No Floating Point Exceptions have been reported
-     9,614,749,166      cycles                           #    2.908 GHz                    
-    22,078,344,118      instructions                     #    2.30  insn per cycle         
-       3.370393570 seconds time elapsed
+     9,673,114,822      cycles                           #    2.925 GHz                    
+    22,022,328,349      instructions                     #    2.28  insn per cycle         
+       3.363974995 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.860935e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.861845e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.861845e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.884766e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.885678e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.885678e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     8.819009 sec
+TOTAL       :     8.707888 sec
 INFO: No Floating Point Exceptions have been reported
-    25,677,829,436      cycles                           #    2.911 GHz                    
-    78,956,070,121      instructions                     #    3.07  insn per cycle         
-       8.823278932 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 4843) (avx2:    0) (512y:    0) (512z:    0)
+    25,646,480,577      cycles                           #    2.944 GHz                    
+    78,959,199,970      instructions                     #    3.08  insn per cycle         
+       8.712344144 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.514741e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.517882e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.517882e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.525938e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.529103e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.529103e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     4.672391 sec
+TOTAL       :     4.657640 sec
 INFO: No Floating Point Exceptions have been reported
-    13,098,570,246      cycles                           #    2.802 GHz                    
-    39,561,785,502      instructions                     #    3.02  insn per cycle         
-       4.676673381 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:13199) (avx2:    0) (512y:    0) (512z:    0)
+    13,102,337,051      cycles                           #    2.811 GHz                    
+    39,559,050,978      instructions                     #    3.02  insn per cycle         
+       4.662071177 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.018608e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.034749e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.034749e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.037518e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.054750e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.054750e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.051675 sec
+TOTAL       :     2.047044 sec
 INFO: No Floating Point Exceptions have been reported
-     5,588,693,407      cycles                           #    2.720 GHz                    
-    13,823,467,867      instructions                     #    2.47  insn per cycle         
-       2.055974957 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11530) (512y:    0) (512z:    0)
+     5,613,016,028      cycles                           #    2.737 GHz                    
+    13,823,575,120      instructions                     #    2.46  insn per cycle         
+       2.051472192 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11520) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.045058e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.066544e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.066544e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.172996e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.194283e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.194283e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.819352 sec
+TOTAL       :     1.794478 sec
 INFO: No Floating Point Exceptions have been reported
-     4,944,679,303      cycles                           #    2.713 GHz                    
-    12,505,400,088      instructions                     #    2.53  insn per cycle         
-       1.823660626 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10449) (512y:   88) (512z:    0)
+     4,922,583,154      cycles                           #    2.738 GHz                    
+    12,506,595,932      instructions                     #    2.54  insn per cycle         
+       1.798855063 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10439) (512y:   89) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.925584e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.938157e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.938157e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.987584e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.999990e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.999990e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.374676 sec
+TOTAL       :     2.353771 sec
 INFO: No Floating Point Exceptions have been reported
-     4,146,979,457      cycles                           #    1.744 GHz                    
-     6,391,143,672      instructions                     #    1.54  insn per cycle         
-       2.379203599 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1974) (512y:  102) (512z: 9391)
+     4,138,447,690      cycles                           #    1.756 GHz                    
+     6,393,230,519      instructions                     #    1.54  insn per cycle         
+       2.358130141 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1978) (512y:  101) (512z: 9386)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt
index 623974ef1b..40b573a43c 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 15s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-29_23:30:10
+DATE: 2024-09-18_12:55:00
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -57,15 +53,16 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.098336e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.430669e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.430669e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.976623e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.275789e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.275789e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.516355 sec
+TOTAL       :     0.524913 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,145,035,124      cycles                           #    2.878 GHz                    
-     3,433,359,795      instructions                     #    1.60  insn per cycle         
-       0.805072037 seconds time elapsed
+     2,187,535,830      cycles                           #    2.870 GHz                    
+     3,393,578,749      instructions                     #    1.55  insn per cycle         
+       0.821118226 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
@@ -83,19 +80,23 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.640626e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.130604e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.130604e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.647884e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.131075e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.131075e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.319603 sec
+TOTAL       :     3.300824 sec
 INFO: No Floating Point Exceptions have been reported
-    10,448,466,366      cycles                           #    2.908 GHz                    
-    23,059,789,642      instructions                     #    2.21  insn per cycle         
-       3.652051020 seconds time elapsed
+INFO: No Floating Point Exceptions have been reported
+    10,456,540,936      cycles                           #    2.924 GHz                    
+    23,609,445,897      instructions                     #    2.26  insn per cycle         
+       3.631983066 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -114,20 +115,24 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.870734e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.871640e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.871640e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.884487e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.885391e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.885391e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     8.777660 sec
+TOTAL       :     8.714099 sec
 INFO: No Floating Point Exceptions have been reported
-    25,686,292,856      cycles                           #    2.925 GHz                    
-    78,962,903,501      instructions                     #    3.07  insn per cycle         
-       8.782134787 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 4843) (avx2:    0) (512y:    0) (512z:    0)
+INFO: No Floating Point Exceptions have been reported
+    25,667,183,320      cycles                           #    2.944 GHz                    
+    78,962,641,614      instructions                     #    3.08  insn per cycle         
+       8.718743024 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,24 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.525474e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.528850e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.528850e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.526771e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.530011e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.530011e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     4.662189 sec
+TOTAL       :     4.660484 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    13,113,811,865      cycles                           #    2.811 GHz                    
-    39,572,448,827      instructions                     #    3.02  insn per cycle         
-       4.666654670 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:13199) (avx2:    0) (512y:    0) (512z:    0)
+    13,111,732,509      cycles                           #    2.811 GHz                    
+    39,572,349,146      instructions                     #    3.02  insn per cycle         
+       4.665178116 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -174,20 +183,24 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.019644e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.036695e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.036695e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.088722e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.106050e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.106050e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.055512 sec
+TOTAL       :     2.038539 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     5,607,614,115      cycles                           #    2.723 GHz                    
-    13,835,145,765      instructions                     #    2.47  insn per cycle         
-       2.060008383 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11530) (512y:    0) (512z:    0)
+     5,622,073,957      cycles                           #    2.753 GHz                    
+    13,834,285,866      instructions                     #    2.46  insn per cycle         
+       2.043264664 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11520) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -204,20 +217,24 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.095889e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.118002e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.118002e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.180474e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.202665e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.202665e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.813580 sec
+TOTAL       :     1.797187 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     4,957,865,514      cycles                           #    2.728 GHz                    
-    12,515,643,969      instructions                     #    2.52  insn per cycle         
-       1.818064103 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10449) (512y:   88) (512z:    0)
+     4,937,816,969      cycles                           #    2.742 GHz                    
+    12,516,988,109      instructions                     #    2.53  insn per cycle         
+       1.801993078 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10439) (512y:   89) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -234,20 +251,24 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.877189e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.889576e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.889576e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.979563e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.992901e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.992901e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.395674 sec
+TOTAL       :     2.360775 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     4,163,517,757      cycles                           #    1.735 GHz                    
-     6,402,780,090      instructions                     #    1.54  insn per cycle         
-       2.400249588 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1974) (512y:  102) (512z: 9391)
+     4,161,078,836      cycles                           #    1.760 GHz                    
+     6,405,054,232      instructions                     #    1.54  insn per cycle         
+       2.365459011 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1978) (512y:  101) (512z: 9386)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt
index b0e7b2010b..14d3e456fd 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 01s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-29_23:40:33
+DATE: 2024-09-18_13:06:00
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.439093e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.464818e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.467330e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.295730e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.322229e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.324249e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.197467e-01 +- 3.250467e-01 )  GeV^-4
-TOTAL       :     0.513287 sec
+TOTAL       :     0.520677 sec
 INFO: No Floating Point Exceptions have been reported
-     2,137,026,227      cycles                           #    2.862 GHz                    
-     3,337,808,086      instructions                     #    1.56  insn per cycle         
-       0.808256494 seconds time elapsed
+     2,148,428,756      cycles                           #    2.846 GHz                    
+     3,383,382,873      instructions                     #    1.57  insn per cycle         
+       0.814124974 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.146669e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.179083e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.180475e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.133518e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.163906e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.165159e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.252232e+02 +- 1.234346e+02 )  GeV^-4
-TOTAL       :     3.125773 sec
+TOTAL       :     3.142841 sec
 INFO: No Floating Point Exceptions have been reported
-     9,801,640,150      cycles                           #    2.901 GHz                    
-    22,336,783,397      instructions                     #    2.28  insn per cycle         
-       3.434371661 seconds time elapsed
+     9,913,272,493      cycles                           #    2.915 GHz                    
+    21,406,834,972      instructions                     #    2.16  insn per cycle         
+       3.457413936 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.869360e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.870267e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.870267e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.883085e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.883982e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.883982e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.197467e-01 +- 3.250467e-01 )  GeV^-4
-TOTAL       :     8.781002 sec
+TOTAL       :     8.717475 sec
 INFO: No Floating Point Exceptions have been reported
-    25,678,377,688      cycles                           #    2.923 GHz                    
-    78,955,720,202      instructions                     #    3.07  insn per cycle         
-       8.785102867 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 4843) (avx2:    0) (512y:    0) (512z:    0)
+    25,650,608,073      cycles                           #    2.942 GHz                    
+    78,955,783,568      instructions                     #    3.08  insn per cycle         
+       8.721774217 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.506291e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.509424e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.509424e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.525568e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.528763e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.528763e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.197467e-01 +- 3.250467e-01 )  GeV^-4
-TOTAL       :     4.685167 sec
+TOTAL       :     4.659766 sec
 INFO: No Floating Point Exceptions have been reported
-    13,112,847,720      cycles                           #    2.797 GHz                    
-    39,561,253,407      instructions                     #    3.02  insn per cycle         
-       4.689284646 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:13199) (avx2:    0) (512y:    0) (512z:    0)
+    13,093,152,498      cycles                           #    2.808 GHz                    
+    39,558,598,891      instructions                     #    3.02  insn per cycle         
+       4.664046020 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.042071e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.058349e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.058349e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.044580e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.060991e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.060991e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.197467e-01 +- 3.250467e-01 )  GeV^-4
-TOTAL       :     2.047508 sec
+TOTAL       :     2.046898 sec
 INFO: No Floating Point Exceptions have been reported
-     5,595,348,153      cycles                           #    2.728 GHz                    
-    13,822,442,672      instructions                     #    2.47  insn per cycle         
-       2.051861141 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11530) (512y:    0) (512z:    0)
+     5,615,094,940      cycles                           #    2.739 GHz                    
+    13,822,846,005      instructions                     #    2.46  insn per cycle         
+       2.051140101 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11520) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.026384e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.047273e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.047273e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.166765e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.187474e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.187474e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.197467e-01 +- 3.250467e-01 )  GeV^-4
-TOTAL       :     1.824976 sec
+TOTAL       :     1.797318 sec
 INFO: No Floating Point Exceptions have been reported
-     4,951,647,688      cycles                           #    2.708 GHz                    
-    12,503,474,172      instructions                     #    2.53  insn per cycle         
-       1.829249158 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10449) (512y:   88) (512z:    0)
+     4,920,345,742      cycles                           #    2.732 GHz                    
+    12,503,437,535      instructions                     #    2.54  insn per cycle         
+       1.801597465 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10439) (512y:   89) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.899169e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.911871e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.911871e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.991343e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.003364e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.003364e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.197467e-01 +- 3.250467e-01 )  GeV^-4
-TOTAL       :     2.385278 sec
+TOTAL       :     2.354011 sec
 INFO: No Floating Point Exceptions have been reported
-     4,153,864,303      cycles                           #    1.739 GHz                    
-     6,389,239,536      instructions                     #    1.54  insn per cycle         
-       2.389608459 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1974) (512y:  102) (512z: 9391)
+     4,141,327,319      cycles                           #    1.757 GHz                    
+     6,390,315,143      instructions                     #    1.54  insn per cycle         
+       2.358468154 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1978) (512y:  101) (512z: 9386)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt
index f54ab4104d..5ee24db0d6 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 00s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-29_23:37:42
+DATE: 2024-09-18_13:03:08
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.452375e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.478341e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.480594e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.306466e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.330597e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.332575e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.510857 sec
+TOTAL       :     0.519642 sec
 INFO: No Floating Point Exceptions have been reported
-     2,125,596,224      cycles                           #    2.863 GHz                    
-     3,340,543,841      instructions                     #    1.57  insn per cycle         
-       0.802256972 seconds time elapsed
+     2,158,072,451      cycles                           #    2.867 GHz                    
+     3,417,619,447      instructions                     #    1.58  insn per cycle         
+       0.812681086 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.146533e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.179036e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.180439e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.138609e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.169214e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.170473e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.069204 sec
+TOTAL       :     3.082389 sec
 INFO: No Floating Point Exceptions have been reported
-     9,615,019,528      cycles                           #    2.895 GHz                    
-    20,428,439,311      instructions                     #    2.12  insn per cycle         
-       3.379551964 seconds time elapsed
+     9,772,793,139      cycles                           #    2.928 GHz                    
+    21,341,571,954      instructions                     #    2.18  insn per cycle         
+       3.396434043 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.851531e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.852432e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.852432e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.882405e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.883286e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.883286e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     8.863900 sec
+TOTAL       :     8.718737 sec
 INFO: No Floating Point Exceptions have been reported
-    25,674,607,651      cycles                           #    2.896 GHz                    
-    78,957,122,456      instructions                     #    3.08  insn per cycle         
-       8.868072639 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 4843) (avx2:    0) (512y:    0) (512z:    0)
+    25,644,402,521      cycles                           #    2.941 GHz                    
+    78,955,748,181      instructions                     #    3.08  insn per cycle         
+       8.722884517 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.525536e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.528790e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.528790e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.546461e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.549706e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.549706e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     4.658062 sec
+TOTAL       :     4.630610 sec
 INFO: No Floating Point Exceptions have been reported
-    13,094,220,368      cycles                           #    2.809 GHz                    
-    39,559,236,862      instructions                     #    3.02  insn per cycle         
-       4.662388664 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:13199) (avx2:    0) (512y:    0) (512z:    0)
+    13,065,342,955      cycles                           #    2.819 GHz                    
+    39,558,576,157      instructions                     #    3.03  insn per cycle         
+       4.634990681 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.024421e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.040536e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.040536e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.040236e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.056583e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.056583e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.050098 sec
+TOTAL       :     2.046187 sec
 INFO: No Floating Point Exceptions have been reported
-     5,590,810,484      cycles                           #    2.723 GHz                    
-    13,823,415,124      instructions                     #    2.47  insn per cycle         
-       2.054333314 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11530) (512y:    0) (512z:    0)
+     5,614,658,100      cycles                           #    2.739 GHz                    
+    13,823,752,036      instructions                     #    2.46  insn per cycle         
+       2.050408597 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11520) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.001072e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.021748e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.021748e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.219242e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.241638e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.241638e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.828463 sec
+TOTAL       :     1.785361 sec
 INFO: No Floating Point Exceptions have been reported
-     4,944,586,506      cycles                           #    2.700 GHz                    
-    12,505,236,539      instructions                     #    2.53  insn per cycle         
-       1.832828782 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10449) (512y:   88) (512z:    0)
+     4,914,777,250      cycles                           #    2.747 GHz                    
+    12,505,304,491      instructions                     #    2.54  insn per cycle         
+       1.789665479 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10439) (512y:   89) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.916008e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.928185e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.928185e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.996219e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.008754e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.008754e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.377965 sec
+TOTAL       :     2.350947 sec
 INFO: No Floating Point Exceptions have been reported
-     4,144,212,852      cycles                           #    1.740 GHz                    
-     6,391,211,532      instructions                     #    1.54  insn per cycle         
-       2.382275809 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1974) (512y:  102) (512z: 9391)
+     4,136,746,733      cycles                           #    1.757 GHz                    
+     6,392,473,320      instructions                     #    1.55  insn per cycle         
+       2.355422920 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1978) (512y:  101) (512z: 9386)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt
index 120d5447ee..bbefe2a8e4 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 01s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-29_23:34:57
+DATE: 2024-09-18_13:00:21
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -54,15 +50,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.168442e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.482839e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.485319e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.061167e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.349241e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.351100e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.513830 sec
+TOTAL       :     0.520345 sec
 INFO: No Floating Point Exceptions have been reported
-     2,147,728,669      cycles                           #    2.867 GHz                    
-     3,437,039,893      instructions                     #    1.60  insn per cycle         
-       0.808379384 seconds time elapsed
+     2,166,689,738      cycles                           #    2.876 GHz                    
+     3,445,065,863      instructions                     #    1.59  insn per cycle         
+       0.812996601 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
@@ -74,19 +70,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.728769e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.181058e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.182447e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.727515e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.166918e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.168161e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.209844 sec
+TOTAL       :     3.213299 sec
 INFO: No Floating Point Exceptions have been reported
-     9,982,588,155      cycles                           #    2.885 GHz                    
-    22,074,605,131      instructions                     #    2.21  insn per cycle         
-       3.519251437 seconds time elapsed
+    10,170,057,078      cycles                           #    2.920 GHz                    
+    23,084,015,508      instructions                     #    2.27  insn per cycle         
+       3.538645884 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -104,20 +103,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.870620e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.871515e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.871515e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.884169e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.885066e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.885066e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     8.773542 sec
+TOTAL       :     8.710738 sec
 INFO: No Floating Point Exceptions have been reported
-    25,656,425,077      cycles                           #    2.923 GHz                    
-    78,955,689,357      instructions                     #    3.08  insn per cycle         
-       8.777616977 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 4843) (avx2:    0) (512y:    0) (512z:    0)
+    25,636,302,572      cycles                           #    2.942 GHz                    
+    78,955,597,829      instructions                     #    3.08  insn per cycle         
+       8.714991120 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -133,20 +135,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.499221e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.502418e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.502418e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.516471e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.519747e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.519747e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     4.692707 sec
+TOTAL       :     4.670214 sec
 INFO: No Floating Point Exceptions have been reported
-    13,103,874,690      cycles                           #    2.791 GHz                    
-    39,562,042,319      instructions                     #    3.02  insn per cycle         
-       4.696908780 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:13199) (avx2:    0) (512y:    0) (512z:    0)
+    13,077,998,657      cycles                           #    2.798 GHz                    
+    39,560,581,640      instructions                     #    3.02  insn per cycle         
+       4.674479243 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -162,20 +167,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.035218e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.052032e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.052032e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.487681e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.501903e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.501903e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.047329 sec
+TOTAL       :     2.196522 sec
 INFO: No Floating Point Exceptions have been reported
-     5,586,651,910      cycles                           #    2.724 GHz                    
-    13,823,431,614      instructions                     #    2.47  insn per cycle         
-       2.051685897 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11530) (512y:    0) (512z:    0)
+     6,031,690,352      cycles                           #    2.742 GHz                    
+    13,823,991,565      instructions                     #    2.29  insn per cycle         
+       2.200855114 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11520) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -191,20 +199,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.106245e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.127457e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.127457e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.160431e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.182355e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.182355e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.807636 sec
+TOTAL       :     1.796714 sec
 INFO: No Floating Point Exceptions have been reported
-     4,945,546,431      cycles                           #    2.731 GHz                    
-    12,506,428,636      instructions                     #    2.53  insn per cycle         
-       1.811855898 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10449) (512y:   88) (512z:    0)
+     4,915,575,489      cycles                           #    2.731 GHz                    
+    12,505,831,482      instructions                     #    2.54  insn per cycle         
+       1.801025403 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10439) (512y:   89) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -220,20 +231,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.912294e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.924724e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.924724e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.864510e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.876687e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.876687e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.378992 sec
+TOTAL       :     2.395867 sec
 INFO: No Floating Point Exceptions have been reported
-     4,143,588,187      cycles                           #    1.739 GHz                    
-     6,391,287,965      instructions                     #    1.54  insn per cycle         
-       2.383250915 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1974) (512y:  102) (512z: 9391)
+     4,162,633,573      cycles                           #    1.735 GHz                    
+     6,392,322,352      instructions                     #    1.54  insn per cycle         
+       2.400290914 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1978) (512y:  101) (512z: 9386)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt
index 4c1e452219..724af1477d 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 53s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-29_22:53:42
+DATE: 2024-09-18_12:16:57
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.474633e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.502711e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.505044e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.313099e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.338786e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.340713e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.525987 sec
+TOTAL       :     0.536981 sec
 INFO: No Floating Point Exceptions have been reported
-     2,173,141,636      cycles                           #    2.870 GHz                    
-     3,442,650,944      instructions                     #    1.58  insn per cycle         
-       0.816470213 seconds time elapsed
+     2,207,441,775      cycles                           #    2.862 GHz                    
+     3,435,949,472      instructions                     #    1.56  insn per cycle         
+       0.828882621 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.142376e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.175014e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.176392e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.143532e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.173218e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.174465e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.027490 sec
+TOTAL       :     3.036857 sec
 INFO: No Floating Point Exceptions have been reported
-     9,517,475,939      cycles                           #    2.901 GHz                    
-    19,988,736,260      instructions                     #    2.10  insn per cycle         
-       3.340146090 seconds time elapsed
+     9,590,647,679      cycles                           #    2.910 GHz                    
+    22,042,753,111      instructions                     #    2.30  insn per cycle         
+       3.351786655 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.876866e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.877757e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.877757e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.884971e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.885850e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.885850e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     8.744259 sec
+TOTAL       :     8.706995 sec
 INFO: No Floating Point Exceptions have been reported
-    25,630,018,477      cycles                           #    2.931 GHz                    
-    78,700,967,648      instructions                     #    3.07  insn per cycle         
-       8.748458419 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 4192) (avx2:    0) (512y:    0) (512z:    0)
+    25,617,517,247      cycles                           #    2.941 GHz                    
+    78,701,000,615      instructions                     #    3.07  insn per cycle         
+       8.711338338 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 4191) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.529280e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.532488e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.532488e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.566075e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.569356e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.569356e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     4.653045 sec
+TOTAL       :     4.605078 sec
 INFO: No Floating Point Exceptions have been reported
-    13,049,773,576      cycles                           #    2.803 GHz                    
-    39,452,267,607      instructions                     #    3.02  insn per cycle         
-       4.657264479 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:12973) (avx2:    0) (512y:    0) (512z:    0)
+    13,036,001,618      cycles                           #    2.829 GHz                    
+    39,449,493,817      instructions                     #    3.03  insn per cycle         
+       4.609408106 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:12966) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.952727e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.968616e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.968616e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.966836e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.982546e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.982546e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.068541 sec
+TOTAL       :     2.064889 sec
 INFO: No Floating Point Exceptions have been reported
-     5,654,997,353      cycles                           #    2.729 GHz                    
-    13,909,812,029      instructions                     #    2.46  insn per cycle         
-       2.072950411 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11592) (512y:    0) (512z:    0)
+     5,676,808,859      cycles                           #    2.745 GHz                    
+    13,911,294,100      instructions                     #    2.45  insn per cycle         
+       2.069253381 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11582) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.933565e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.954089e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.954089e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.081065e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.102389e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.102389e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.842015 sec
+TOTAL       :     1.812330 sec
 INFO: No Floating Point Exceptions have been reported
-     4,995,833,498      cycles                           #    2.707 GHz                    
-    12,603,386,160      instructions                     #    2.52  insn per cycle         
-       1.846187939 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10433) (512y:  240) (512z:    0)
+     4,986,765,093      cycles                           #    2.746 GHz                    
+    12,602,417,777      instructions                     #    2.53  insn per cycle         
+       1.816710814 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10423) (512y:  241) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.928234e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.940575e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.940575e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.944688e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.956851e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.956851e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.373698 sec
+TOTAL       :     2.368181 sec
 INFO: No Floating Point Exceptions have been reported
-     4,151,141,696      cycles                           #    1.746 GHz                    
-     6,499,236,974      instructions                     #    1.57  insn per cycle         
-       2.378031241 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1750) (512y:  194) (512z: 9387)
+     4,157,079,693      cycles                           #    1.753 GHz                    
+     6,500,343,598      instructions                     #    1.56  insn per cycle         
+       2.372472342 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1754) (512y:  193) (512z: 9382)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt
index 7af0451fcc..9c62ee596f 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 02m 33s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-29_23:20:39
+DATE: 2024-09-18_12:45:20
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.254301e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.279795e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.282561e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.107911e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.129674e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.131105e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.538286 sec
+TOTAL       :     0.542394 sec
 INFO: No Floating Point Exceptions have been reported
-     2,191,082,086      cycles                           #    2.854 GHz                    
-     3,355,829,429      instructions                     #    1.53  insn per cycle         
-       0.826481038 seconds time elapsed
+     2,239,231,771      cycles                           #    2.882 GHz                    
+     3,498,325,403      instructions                     #    1.56  insn per cycle         
+       0.833980513 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.754882e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.783344e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.784543e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.758554e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.783710e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.784685e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.311612 sec
+TOTAL       :     3.309606 sec
 INFO: No Floating Point Exceptions have been reported
-    10,400,680,075      cycles                           #    2.916 GHz                    
-    23,997,882,975      instructions                     #    2.31  insn per cycle         
-       3.622738301 seconds time elapsed
+    10,428,358,714      cycles                           #    2.922 GHz                    
+    23,876,781,455      instructions                     #    2.29  insn per cycle         
+       3.623869439 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.237636e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.238116e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.238116e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.278929e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.279396e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.279396e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :    38.708076 sec
+TOTAL       :    38.335228 sec
 INFO: No Floating Point Exceptions have been reported
-   113,002,307,863      cycles                           #    2.919 GHz                    
-   144,865,531,987      instructions                     #    1.28  insn per cycle         
-      38.712493320 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:21361) (avx2:    0) (512y:    0) (512z:    0)
+   112,569,296,340      cycles                           #    2.936 GHz                    
+   144,793,904,773      instructions                     #    1.29  insn per cycle         
+      38.339626690 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:21273) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.074049e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.076486e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.076486e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.146613e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.149188e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.149188e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     5.341332 sec
+TOTAL       :     5.218296 sec
 INFO: No Floating Point Exceptions have been reported
-    14,754,166,844      cycles                           #    2.761 GHz                    
-    37,648,707,475      instructions                     #    2.55  insn per cycle         
-       5.345720009 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:68253) (avx2:    0) (512y:    0) (512z:    0)
+    14,745,365,482      cycles                           #    2.824 GHz                    
+    37,604,718,701      instructions                     #    2.55  insn per cycle         
+       5.222619147 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:68172) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.336087e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.349641e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.349641e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.373915e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.387237e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.387237e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.242124 sec
+TOTAL       :     2.230777 sec
 INFO: No Floating Point Exceptions have been reported
-     6,124,700,045      cycles                           #    2.727 GHz                    
-    13,060,839,816      instructions                     #    2.13  insn per cycle         
-       2.246622192 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:46965) (512y:    0) (512z:    0)
+     6,114,551,945      cycles                           #    2.737 GHz                    
+    13,052,964,850      instructions                     #    2.13  insn per cycle         
+       2.235150749 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:46946) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.868994e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.889122e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.889122e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.869797e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.889489e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.889489e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.855494 sec
+TOTAL       :     1.855437 sec
 INFO: No Floating Point Exceptions have been reported
-     5,071,321,771      cycles                           #    2.728 GHz                    
-    11,453,092,735      instructions                     #    2.26  insn per cycle         
-       1.859840409 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:40490) (512y:  285) (512z:    0)
+     5,079,069,827      cycles                           #    2.732 GHz                    
+    11,450,297,808      instructions                     #    2.25  insn per cycle         
+       1.859852844 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:40486) (512y:  285) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.280272e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.293880e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.293880e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.334322e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.348410e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.348410e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.259563 sec
+TOTAL       :     2.242689 sec
 INFO: No Floating Point Exceptions have been reported
-     3,952,850,673      cycles                           #    1.747 GHz                    
-     5,926,868,768      instructions                     #    1.50  insn per cycle         
-       2.264082563 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2432) (512y:  337) (512z:39348)
+     3,955,754,497      cycles                           #    1.761 GHz                    
+     5,927,045,148      instructions                     #    1.50  insn per cycle         
+       2.247181135 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2444) (512y:  337) (512z:39338)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt
index db961be493..af0b172ab7 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 02m 20s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-29_23:21:47
+DATE: 2024-09-18_12:46:29
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.262031e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.287266e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.289567e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.101802e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.121265e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.122962e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.540687 sec
+TOTAL       :     0.540843 sec
 INFO: No Floating Point Exceptions have been reported
-     2,167,162,043      cycles                           #    2.831 GHz                    
-     3,412,749,114      instructions                     #    1.57  insn per cycle         
-       0.827269874 seconds time elapsed
+     2,223,817,024      cycles                           #    2.871 GHz                    
+     3,385,583,234      instructions                     #    1.52  insn per cycle         
+       0.831231377 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.754345e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.782845e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.783987e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.740756e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.765684e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.766660e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.305990 sec
+TOTAL       :     3.304935 sec
 INFO: No Floating Point Exceptions have been reported
-    10,339,047,103      cycles                           #    2.904 GHz                    
-    21,814,042,211      instructions                     #    2.11  insn per cycle         
-       3.618232079 seconds time elapsed
+    10,396,192,831      cycles                           #    2.917 GHz                    
+    23,795,713,123      instructions                     #    2.29  insn per cycle         
+       3.619511438 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2
@@ -101,27 +100,30 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.213809e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.214258e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.214258e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.220488e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.220945e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.220945e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :    38.927029 sec
+TOTAL       :    38.865543 sec
 INFO: No Floating Point Exceptions have been reported
-   113,599,990,236      cycles                           #    2.918 GHz                    
-   144,263,471,206      instructions                     #    1.27  insn per cycle         
-      38.931468699 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:20934) (avx2:    0) (512y:    0) (512z:    0)
+   114,075,746,984      cycles                           #    2.935 GHz                    
+   144,284,837,728      instructions                     #    1.26  insn per cycle         
+      38.869913276 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:21024) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
-Avg ME (F77/C++)    = 6.6266731198140439E-004
-Relative difference = 2.8372991823632784e-07
+Avg ME (F77/C++)    = 6.6266731198140450E-004
+Relative difference = 2.83729918072716e-07
 OK (relative difference <= 5E-3)
 =========================================================================
 runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP=
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.980868e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.983156e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.983156e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.002635e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.004951e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.004951e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     5.507958 sec
+TOTAL       :     5.467988 sec
 INFO: No Floating Point Exceptions have been reported
-    15,296,337,393      cycles                           #    2.775 GHz                    
-    38,390,334,283      instructions                     #    2.51  insn per cycle         
-       5.512252272 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:69643) (avx2:    0) (512y:    0) (512z:    0)
+    15,296,909,197      cycles                           #    2.796 GHz                    
+    37,837,176,497      instructions                     #    2.47  insn per cycle         
+       5.472337784 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:68594) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.448639e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.462484e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.462484e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.512966e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.527080e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.527080e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.208158 sec
+TOTAL       :     2.189426 sec
 INFO: No Floating Point Exceptions have been reported
-     6,009,609,297      cycles                           #    2.717 GHz                    
-    12,934,869,074      instructions                     #    2.15  insn per cycle         
-       2.212574107 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:46091) (512y:    0) (512z:    0)
+     6,002,714,707      cycles                           #    2.737 GHz                    
+    12,921,820,063      instructions                     #    2.15  insn per cycle         
+       2.193921042 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:46048) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.589282e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.608300e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.608300e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.859047e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.878725e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.878725e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.915769 sec
+TOTAL       :     1.857736 sec
 INFO: No Floating Point Exceptions have been reported
-     5,100,469,209      cycles                           #    2.658 GHz                    
-    11,450,493,955      instructions                     #    2.24  insn per cycle         
-       1.920190216 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:40134) (512y:  219) (512z:    0)
+     5,096,589,479      cycles                           #    2.738 GHz                    
+    11,450,886,914      instructions                     #    2.25  insn per cycle         
+       1.862161811 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:40151) (512y:  219) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.250893e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.264325e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.264325e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.316370e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.329769e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.329769e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.268313 sec
+TOTAL       :     2.247849 sec
 INFO: No Floating Point Exceptions have been reported
-     3,950,507,577      cycles                           #    1.739 GHz                    
-     5,890,173,466      instructions                     #    1.49  insn per cycle         
-       2.272639544 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1947) (512y:  259) (512z:38926)
+     3,953,949,727      cycles                           #    1.756 GHz                    
+     5,894,038,279      instructions                     #    1.49  insn per cycle         
+       2.252346875 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1959) (512y:  259) (512z:38977)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt
index caf859f330..90e270bc8d 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 58s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-29_22:54:16
+DATE: 2024-09-18_12:17:32
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.974829e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.019268e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.026437e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.485010e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.524901e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.528717e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059596e+00 +- 2.368053e+00 )  GeV^-4
-TOTAL       :     0.488083 sec
+TOTAL       :     0.493784 sec
 INFO: No Floating Point Exceptions have been reported
-     2,011,611,296      cycles                           #    2.862 GHz                    
-     3,016,167,250      instructions                     #    1.50  insn per cycle         
-       0.763948000 seconds time elapsed
+     2,047,150,406      cycles                           #    2.872 GHz                    
+     3,017,206,545      instructions                     #    1.47  insn per cycle         
+       0.769521849 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.210072e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.275111e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.278094e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.130872e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.191030e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.193752e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.664703e+00 +- 5.072736e+00 )  GeV^-4
-TOTAL       :     1.784736 sec
+TOTAL       :     1.801872 sec
 INFO: No Floating Point Exceptions have been reported
-     5,838,577,722      cycles                           #    2.898 GHz                    
-    12,186,585,442      instructions                     #    2.09  insn per cycle         
-       2.071487175 seconds time elapsed
+     5,918,779,581      cycles                           #    2.909 GHz                    
+    12,693,441,452      instructions                     #    2.14  insn per cycle         
+       2.093566853 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.926933e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.927895e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.927895e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.942960e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.943920e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.943920e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060121e+00 +- 2.367902e+00 )  GeV^-4
-TOTAL       :     8.516057 sec
+TOTAL       :     8.446218 sec
 INFO: No Floating Point Exceptions have been reported
-    24,970,984,207      cycles                           #    2.931 GHz                    
-    79,109,755,994      instructions                     #    3.17  insn per cycle         
-       8.520078553 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 3573) (avx2:    0) (512y:    0) (512z:    0)
+    24,891,970,806      cycles                           #    2.946 GHz                    
+    79,110,184,615      instructions                     #    3.18  insn per cycle         
+       8.450517031 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.961795e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.974179e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.974179e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.000853e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.014105e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.014105e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060119e+00 +- 2.367901e+00 )  GeV^-4
-TOTAL       :     2.360659 sec
+TOTAL       :     2.347412 sec
 INFO: No Floating Point Exceptions have been reported
-     6,513,112,442      cycles                           #    2.755 GHz                    
-    20,270,717,088      instructions                     #    3.11  insn per cycle         
-       2.364792746 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:13785) (avx2:    0) (512y:    0) (512z:    0)
+     6,535,913,878      cycles                           #    2.780 GHz                    
+    20,270,850,285      instructions                     #    3.10  insn per cycle         
+       2.351723425 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.566079e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.572375e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.572375e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.599290e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.605892e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.605892e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     1.052140 sec
+TOTAL       :     1.030594 sec
 INFO: No Floating Point Exceptions have been reported
-     2,869,893,884      cycles                           #    2.719 GHz                    
-     7,065,821,933      instructions                     #    2.46  insn per cycle         
-       1.056261642 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12058) (512y:    0) (512z:    0)
+     2,836,963,276      cycles                           #    2.743 GHz                    
+     7,065,994,832      instructions                     #    2.49  insn per cycle         
+       1.034860296 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12055) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.792686e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.801200e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.801200e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.795295e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.803482e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.803482e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     0.919902 sec
+TOTAL       :     0.918514 sec
 INFO: No Floating Point Exceptions have been reported
-     2,517,840,272      cycles                           #    2.727 GHz                    
-     6,403,424,925      instructions                     #    2.54  insn per cycle         
-       0.923978611 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11026) (512y:   43) (512z:    0)
+     2,528,652,589      cycles                           #    2.743 GHz                    
+     6,403,959,518      instructions                     #    2.53  insn per cycle         
+       0.922696206 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11019) (512y:   44) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.398821e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.403770e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.403770e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.410082e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.415209e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.415209e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060562e+00 +- 2.367612e+00 )  GeV^-4
-TOTAL       :     1.177484 sec
+TOTAL       :     1.168400 sec
 INFO: No Floating Point Exceptions have been reported
-     2,069,515,653      cycles                           #    1.752 GHz                    
-     3,303,689,754      instructions                     #    1.60  insn per cycle         
-       1.181725896 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2591) (512y:   46) (512z: 9609)
+     2,072,435,771      cycles                           #    1.768 GHz                    
+     3,304,546,208      instructions                     #    1.59  insn per cycle         
+       1.172720772 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2603) (512y:   44) (512z: 9605)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt
index 4384aa126c..c66db7ae78 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 03s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-29_23:30:44
+DATE: 2024-09-18_12:55:35
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -57,15 +53,16 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.349206e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.974366e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.974366e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.970193e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.498612e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.498612e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.048178e+00 +- 2.364571e+00 )  GeV^-4
-TOTAL       :     0.473565 sec
+TOTAL       :     0.479926 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     1,958,980,880      cycles                           #    2.856 GHz                    
-     2,988,475,752      instructions                     #    1.53  insn per cycle         
-       0.743220272 seconds time elapsed
+     2,011,379,748      cycles                           #    2.884 GHz                    
+     3,038,247,862      instructions                     #    1.51  insn per cycle         
+       0.753810180 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
@@ -83,19 +80,23 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.926520e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.073479e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.073479e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.940879e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.083233e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.083233e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.641709e+00 +- 4.994248e+00 )  GeV^-4
-TOTAL       :     1.969743 sec
+TOTAL       :     1.970426 sec
 INFO: No Floating Point Exceptions have been reported
-     6,416,487,251      cycles                           #    2.894 GHz                    
-    13,765,769,658      instructions                     #    2.15  insn per cycle         
-       2.275672841 seconds time elapsed
+INFO: No Floating Point Exceptions have been reported
+     6,440,275,548      cycles                           #    2.913 GHz                    
+    13,287,281,132      instructions                     #    2.06  insn per cycle         
+       2.267959957 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -114,20 +115,24 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.920489e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.921406e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.921406e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.936854e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.937817e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.937817e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060121e+00 +- 2.367902e+00 )  GeV^-4
-TOTAL       :     8.546975 sec
+TOTAL       :     8.475178 sec
 INFO: No Floating Point Exceptions have been reported
-    24,967,095,829      cycles                           #    2.920 GHz                    
-    79,114,043,176      instructions                     #    3.17  insn per cycle         
-       8.551105311 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 3573) (avx2:    0) (512y:    0) (512z:    0)
+INFO: No Floating Point Exceptions have been reported
+    24,927,059,080      cycles                           #    2.940 GHz                    
+    79,118,119,354      instructions                     #    3.17  insn per cycle         
+       8.479535627 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,24 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.964392e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.977246e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.977246e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.002962e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.015509e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.015509e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060119e+00 +- 2.367901e+00 )  GeV^-4
-TOTAL       :     2.362536 sec
+TOTAL       :     2.349551 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     6,526,475,046      cycles                           #    2.759 GHz                    
-    20,281,153,306      instructions                     #    3.11  insn per cycle         
-       2.366796104 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:13785) (avx2:    0) (512y:    0) (512z:    0)
+     6,544,667,804      cycles                           #    2.781 GHz                    
+    20,279,974,113      instructions                     #    3.10  insn per cycle         
+       2.353974711 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -174,20 +183,24 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.576931e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.583321e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.583321e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.603853e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.610574e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.610574e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     1.047656 sec
+TOTAL       :     1.030576 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,876,976,191      cycles                           #    2.737 GHz                    
-     7,076,241,584      instructions                     #    2.46  insn per cycle         
-       1.051948309 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12058) (512y:    0) (512z:    0)
+     2,847,456,615      cycles                           #    2.753 GHz                    
+     7,075,989,633      instructions                     #    2.49  insn per cycle         
+       1.035024707 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12055) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -204,20 +217,24 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.775555e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.783879e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.783879e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.785349e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.793696e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.793696e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     0.931498 sec
+TOTAL       :     0.926691 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,525,182,211      cycles                           #    2.700 GHz                    
-     6,413,494,225      instructions                     #    2.54  insn per cycle         
-       0.936017248 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11026) (512y:   43) (512z:    0)
+     2,540,934,134      cycles                           #    2.731 GHz                    
+     6,413,438,200      instructions                     #    2.52  insn per cycle         
+       0.931148836 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11019) (512y:   44) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -234,20 +251,24 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.390162e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.395169e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.395169e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.400821e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.405962e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.405962e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060562e+00 +- 2.367612e+00 )  GeV^-4
-TOTAL       :     1.187644 sec
+TOTAL       :     1.179178 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,081,030,338      cycles                           #    1.747 GHz                    
-     3,314,848,476      instructions                     #    1.59  insn per cycle         
-       1.191904774 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2591) (512y:   46) (512z: 9609)
+     2,081,047,712      cycles                           #    1.760 GHz                    
+     3,314,864,763      instructions                     #    1.59  insn per cycle         
+       1.183503546 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2603) (512y:   44) (512z: 9605)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt
index 5f6b002bb3..3aa8ed158e 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 00s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-29_23:41:06
+DATE: 2024-09-18_13:06:34
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.965237e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.008721e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.013692e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.472678e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.513045e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.517154e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.159396e-01 +- 3.238803e-01 )  GeV^-4
-TOTAL       :     0.469895 sec
+TOTAL       :     0.480872 sec
 INFO: No Floating Point Exceptions have been reported
-     1,989,054,745      cycles                           #    2.864 GHz                    
-     3,002,672,709      instructions                     #    1.51  insn per cycle         
-       0.751303855 seconds time elapsed
+     2,012,103,314      cycles                           #    2.880 GHz                    
+     2,956,061,319      instructions                     #    1.47  insn per cycle         
+       0.756135044 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.126762e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.189675e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.192592e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.032542e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.093526e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.096446e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.094367e+02 +- 1.071509e+02 )  GeV^-4
-TOTAL       :     1.874744 sec
+TOTAL       :     1.887302 sec
 INFO: No Floating Point Exceptions have been reported
-     6,008,746,998      cycles                           #    2.857 GHz                    
-    12,059,694,785      instructions                     #    2.01  insn per cycle         
-       2.159599467 seconds time elapsed
+     6,151,892,700      cycles                           #    2.911 GHz                    
+    12,903,540,079      instructions                     #    2.10  insn per cycle         
+       2.177167582 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.922848e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.923819e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.923819e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.942290e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.943248e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.943248e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.208459e-01 +- 3.253446e-01 )  GeV^-4
-TOTAL       :     8.535682 sec
+TOTAL       :     8.449460 sec
 INFO: No Floating Point Exceptions have been reported
-    24,979,869,216      cycles                           #    2.925 GHz                    
-    79,110,524,140      instructions                     #    3.17  insn per cycle         
-       8.539754584 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 3573) (avx2:    0) (512y:    0) (512z:    0)
+    24,927,677,850      cycles                           #    2.949 GHz                    
+    79,113,674,015      instructions                     #    3.17  insn per cycle         
+       8.453509271 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.973837e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.986611e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.986611e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.966325e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.979405e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.979405e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.208457e-01 +- 3.253445e-01 )  GeV^-4
-TOTAL       :     2.357528 sec
+TOTAL       :     2.360667 sec
 INFO: No Floating Point Exceptions have been reported
-     6,515,014,758      cycles                           #    2.761 GHz                    
-    20,269,142,798      instructions                     #    3.11  insn per cycle         
-       2.361661819 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:13785) (avx2:    0) (512y:    0) (512z:    0)
+     6,536,812,483      cycles                           #    2.766 GHz                    
+    20,271,244,947      instructions                     #    3.10  insn per cycle         
+       2.364721005 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.574724e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.581587e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.581587e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.594039e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.600583e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.600583e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.214978e-01 +- 3.255521e-01 )  GeV^-4
-TOTAL       :     1.047415 sec
+TOTAL       :     1.034746 sec
 INFO: No Floating Point Exceptions have been reported
-     2,863,177,251      cycles                           #    2.724 GHz                    
-     7,063,220,596      instructions                     #    2.47  insn per cycle         
-       1.051770639 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12058) (512y:    0) (512z:    0)
+     2,840,398,673      cycles                           #    2.736 GHz                    
+     7,064,163,701      instructions                     #    2.49  insn per cycle         
+       1.038926233 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12055) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.791876e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.800569e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.800569e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.789304e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.797829e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.797829e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.214978e-01 +- 3.255521e-01 )  GeV^-4
-TOTAL       :     0.921846 sec
+TOTAL       :     0.922641 sec
 INFO: No Floating Point Exceptions have been reported
-     2,521,844,440      cycles                           #    2.725 GHz                    
-     6,401,695,337      instructions                     #    2.54  insn per cycle         
-       0.925999634 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11026) (512y:   43) (512z:    0)
+     2,530,877,890      cycles                           #    2.733 GHz                    
+     6,400,607,448      instructions                     #    2.53  insn per cycle         
+       0.926747674 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11019) (512y:   44) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.393441e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.398473e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.398473e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.398241e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.403280e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.403280e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.214981e-01 +- 3.255523e-01 )  GeV^-4
-TOTAL       :     1.183808 sec
+TOTAL       :     1.179849 sec
 INFO: No Floating Point Exceptions have been reported
-     2,075,399,533      cycles                           #    1.749 GHz                    
-     3,302,834,379      instructions                     #    1.59  insn per cycle         
-       1.187854862 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2591) (512y:   46) (512z: 9609)
+     2,072,557,863      cycles                           #    1.752 GHz                    
+     3,302,114,927      instructions                     #    1.59  insn per cycle         
+       1.183970001 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2603) (512y:   44) (512z: 9605)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt
index 5a9fcaa215..383503bdc9 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 01s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-29_23:38:16
+DATE: 2024-09-18_13:03:42
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.005321e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.051255e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.056194e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.456336e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.498999e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.503203e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059596e+00 +- 2.368053e+00 )  GeV^-4
-TOTAL       :     0.469314 sec
+TOTAL       :     0.480678 sec
 INFO: No Floating Point Exceptions have been reported
-     1,943,160,703      cycles                           #    2.856 GHz                    
-     2,972,314,757      instructions                     #    1.53  insn per cycle         
-       0.737370242 seconds time elapsed
+     1,979,230,420      cycles                           #    2.841 GHz                    
+     2,971,475,191      instructions                     #    1.50  insn per cycle         
+       0.754740237 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.174738e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.237805e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.240772e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.127471e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.189319e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.192138e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.664703e+00 +- 5.072736e+00 )  GeV^-4
-TOTAL       :     1.820268 sec
+TOTAL       :     1.829247 sec
 INFO: No Floating Point Exceptions have been reported
-     5,892,605,459      cycles                           #    2.879 GHz                    
-    11,953,174,508      instructions                     #    2.03  insn per cycle         
-       2.103372158 seconds time elapsed
+     6,015,541,951      cycles                           #    2.921 GHz                    
+    13,102,927,362      instructions                     #    2.18  insn per cycle         
+       2.117747585 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.891389e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.892289e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.892289e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.933929e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.934853e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.934853e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060121e+00 +- 2.367902e+00 )  GeV^-4
-TOTAL       :     8.675984 sec
+TOTAL       :     8.485517 sec
 INFO: No Floating Point Exceptions have been reported
-    24,971,528,629      cycles                           #    2.877 GHz                    
-    79,109,972,373      instructions                     #    3.17  insn per cycle         
-       8.679991949 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 3573) (avx2:    0) (512y:    0) (512z:    0)
+    24,920,337,024      cycles                           #    2.936 GHz                    
+    79,110,004,323      instructions                     #    3.17  insn per cycle         
+       8.489672562 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.872830e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.885581e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.885581e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.908881e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.921968e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.921968e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060119e+00 +- 2.367901e+00 )  GeV^-4
-TOTAL       :     2.390976 sec
+TOTAL       :     2.378636 sec
 INFO: No Floating Point Exceptions have been reported
-     6,521,637,162      cycles                           #    2.724 GHz                    
-    20,271,182,025      instructions                     #    3.11  insn per cycle         
-       2.395096620 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:13785) (avx2:    0) (512y:    0) (512z:    0)
+     6,534,965,408      cycles                           #    2.745 GHz                    
+    20,270,944,694      instructions                     #    3.10  insn per cycle         
+       2.382810423 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.530401e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.536759e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.536759e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.596709e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.603236e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.603236e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     1.076707 sec
+TOTAL       :     1.032159 sec
 INFO: No Floating Point Exceptions have been reported
-     2,865,147,176      cycles                           #    2.656 GHz                    
-     7,066,009,717      instructions                     #    2.47  insn per cycle         
-       1.083434992 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12058) (512y:    0) (512z:    0)
+     2,835,702,890      cycles                           #    2.738 GHz                    
+     7,066,012,611      instructions                     #    2.49  insn per cycle         
+       1.036273546 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12055) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.763488e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.771736e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.771736e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.794868e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.803140e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.803140e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     0.934798 sec
+TOTAL       :     0.918666 sec
 INFO: No Floating Point Exceptions have been reported
-     2,518,912,552      cycles                           #    2.685 GHz                    
-     6,403,623,689      instructions                     #    2.54  insn per cycle         
-       0.938856790 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11026) (512y:   43) (512z:    0)
+     2,525,424,797      cycles                           #    2.739 GHz                    
+     6,403,502,842      instructions                     #    2.54  insn per cycle         
+       0.922861156 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11019) (512y:   44) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.394629e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.399612e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.399612e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.410052e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.415143e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.415143e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060562e+00 +- 2.367612e+00 )  GeV^-4
-TOTAL       :     1.181219 sec
+TOTAL       :     1.168246 sec
 INFO: No Floating Point Exceptions have been reported
-     2,069,401,084      cycles                           #    1.747 GHz                    
-     3,303,621,434      instructions                     #    1.60  insn per cycle         
-       1.185349026 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2591) (512y:   46) (512z: 9609)
+     2,068,179,462      cycles                           #    1.765 GHz                    
+     3,303,875,484      instructions                     #    1.60  insn per cycle         
+       1.172542272 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2603) (512y:   44) (512z: 9605)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt
index 4931fb2466..7797c46a19 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 00s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-29_23:35:30
+DATE: 2024-09-18_13:00:55
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -54,15 +50,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.452614e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.032417e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.037434e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.992477e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.494287e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.498231e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.048178e+00 +- 2.364571e+00 )  GeV^-4
-TOTAL       :     0.470442 sec
+TOTAL       :     0.481963 sec
 INFO: No Floating Point Exceptions have been reported
-     1,990,742,039      cycles                           #    2.864 GHz                    
-     2,996,109,900      instructions                     #    1.51  insn per cycle         
-       0.751854703 seconds time elapsed
+     1,997,965,324      cycles                           #    2.853 GHz                    
+     2,939,834,102      instructions                     #    1.47  insn per cycle         
+       0.757193064 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
@@ -74,19 +70,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.165289e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.245358e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.248369e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.118190e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.192549e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.195361e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.641709e+00 +- 4.994248e+00 )  GeV^-4
-TOTAL       :     1.898543 sec
+TOTAL       :     1.905617 sec
 INFO: No Floating Point Exceptions have been reported
-     6,178,614,639      cycles                           #    2.905 GHz                    
-    13,438,267,033      instructions                     #    2.17  insn per cycle         
-       2.185826437 seconds time elapsed
+     6,204,653,970      cycles                           #    2.904 GHz                    
+    11,932,036,366      instructions                     #    1.92  insn per cycle         
+       2.194579719 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -104,20 +103,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.928370e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.929306e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.929306e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.934899e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.935850e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.935850e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060121e+00 +- 2.367902e+00 )  GeV^-4
-TOTAL       :     8.509778 sec
+TOTAL       :     8.481229 sec
 INFO: No Floating Point Exceptions have been reported
-    24,989,263,511      cycles                           #    2.936 GHz                    
-    79,114,106,677      instructions                     #    3.17  insn per cycle         
-       8.513783620 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 3573) (avx2:    0) (512y:    0) (512z:    0)
+    24,933,908,474      cycles                           #    2.939 GHz                    
+    79,109,779,876      instructions                     #    3.17  insn per cycle         
+       8.485474778 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -133,20 +135,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.954225e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.966789e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.966789e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.954399e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.967143e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.967143e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060119e+00 +- 2.367901e+00 )  GeV^-4
-TOTAL       :     2.363221 sec
+TOTAL       :     2.363281 sec
 INFO: No Floating Point Exceptions have been reported
-     6,520,268,594      cycles                           #    2.755 GHz                    
-    20,272,411,862      instructions                     #    3.11  insn per cycle         
-       2.367321549 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:13785) (avx2:    0) (512y:    0) (512z:    0)
+     6,535,222,026      cycles                           #    2.761 GHz                    
+    20,271,091,445      instructions                     #    3.10  insn per cycle         
+       2.367564480 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -162,20 +167,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.578872e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.585242e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.585242e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.592187e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.598658e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.598658e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     1.043675 sec
+TOTAL       :     1.035113 sec
 INFO: No Floating Point Exceptions have been reported
-     2,864,987,448      cycles                           #    2.736 GHz                    
-     7,066,433,957      instructions                     #    2.47  insn per cycle         
-       1.047699347 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12058) (512y:    0) (512z:    0)
+     2,837,322,925      cycles                           #    2.732 GHz                    
+     7,065,851,947      instructions                     #    2.49  insn per cycle         
+       1.039614272 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12055) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -191,20 +199,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.792624e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.800903e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.800903e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.786472e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.794657e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.794657e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     0.919863 sec
+TOTAL       :     0.923192 sec
 INFO: No Floating Point Exceptions have been reported
-     2,518,456,002      cycles                           #    2.728 GHz                    
-     6,403,348,870      instructions                     #    2.54  insn per cycle         
-       0.924030112 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11026) (512y:   43) (512z:    0)
+     2,528,197,649      cycles                           #    2.730 GHz                    
+     6,403,497,083      instructions                     #    2.53  insn per cycle         
+       0.927414591 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11019) (512y:   44) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -220,20 +231,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.396708e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.401761e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.401761e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.394144e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.399234e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.399234e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060562e+00 +- 2.367612e+00 )  GeV^-4
-TOTAL       :     1.179267 sec
+TOTAL       :     1.181792 sec
 INFO: No Floating Point Exceptions have been reported
-     2,067,882,758      cycles                           #    1.748 GHz                    
-     3,303,840,347      instructions                     #    1.60  insn per cycle         
-       1.183653607 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2591) (512y:   46) (512z: 9609)
+     2,068,985,618      cycles                           #    1.745 GHz                    
+     3,303,850,767      instructions                     #    1.60  insn per cycle         
+       1.186123644 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2603) (512y:   44) (512z: 9605)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt
index 53da48f73b..9b731718b7 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 54s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-29_22:54:42
+DATE: 2024-09-18_12:17:58
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.983038e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.030651e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.035775e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.454590e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.492804e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.497193e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059596e+00 +- 2.368053e+00 )  GeV^-4
-TOTAL       :     0.485658 sec
+TOTAL       :     0.495704 sec
 INFO: No Floating Point Exceptions have been reported
-     2,018,364,855      cycles                           #    2.868 GHz                    
-     3,042,795,140      instructions                     #    1.51  insn per cycle         
-       0.761253338 seconds time elapsed
+     2,032,995,153      cycles                           #    2.848 GHz                    
+     2,991,224,667      instructions                     #    1.47  insn per cycle         
+       0.774166376 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.194816e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.259321e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.262341e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.094149e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.154905e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.157610e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.664703e+00 +- 5.072736e+00 )  GeV^-4
-TOTAL       :     1.788438 sec
+TOTAL       :     1.805319 sec
 INFO: No Floating Point Exceptions have been reported
-     5,850,668,315      cycles                           #    2.899 GHz                    
-    12,627,354,962      instructions                     #    2.16  insn per cycle         
-       2.076625668 seconds time elapsed
+     5,914,324,613      cycles                           #    2.902 GHz                    
+    11,873,756,543      instructions                     #    2.01  insn per cycle         
+       2.096430893 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.930772e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.931709e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.931709e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.929536e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.930480e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.930480e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060121e+00 +- 2.367902e+00 )  GeV^-4
-TOTAL       :     8.499103 sec
+TOTAL       :     8.504728 sec
 INFO: No Floating Point Exceptions have been reported
-    24,882,769,420      cycles                           #    2.927 GHz                    
-    78,843,195,491      instructions                     #    3.17  insn per cycle         
-       8.503048390 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 3093) (avx2:    0) (512y:    0) (512z:    0)
+    25,015,654,943      cycles                           #    2.941 GHz                    
+    78,847,702,433      instructions                     #    3.15  insn per cycle         
+       8.508857223 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 3092) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.188799e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.202194e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.202194e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.178831e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.192718e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.192718e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060119e+00 +- 2.367901e+00 )  GeV^-4
-TOTAL       :     2.286108 sec
+TOTAL       :     2.289338 sec
 INFO: No Floating Point Exceptions have been reported
-     6,462,798,604      cycles                           #    2.823 GHz                    
-    20,230,881,275      instructions                     #    3.13  insn per cycle         
-       2.290393769 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:13497) (avx2:    0) (512y:    0) (512z:    0)
+     6,463,318,702      cycles                           #    2.819 GHz                    
+    20,229,880,790      instructions                     #    3.13  insn per cycle         
+       2.293529801 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:13491) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.515511e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.521438e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.521438e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.520587e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.526569e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.526569e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     1.086872 sec
+TOTAL       :     1.083432 sec
 INFO: No Floating Point Exceptions have been reported
-     2,971,860,963      cycles                           #    2.725 GHz                    
-     7,206,762,027      instructions                     #    2.42  insn per cycle         
-       1.091113117 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12440) (512y:    0) (512z:    0)
+     2,984,403,957      cycles                           #    2.746 GHz                    
+     7,207,167,499      instructions                     #    2.41  insn per cycle         
+       1.087697042 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12437) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.734369e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.742330e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.742330e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.733677e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.741677e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.741677e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     0.950268 sec
+TOTAL       :     0.950819 sec
 INFO: No Floating Point Exceptions have been reported
-     2,601,308,056      cycles                           #    2.727 GHz                    
-     6,544,650,602      instructions                     #    2.52  insn per cycle         
-       0.954512299 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11454) (512y:   26) (512z:    0)
+     2,611,989,316      cycles                           #    2.737 GHz                    
+     6,545,448,351      instructions                     #    2.51  insn per cycle         
+       0.954971597 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11449) (512y:   27) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.341554e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.346314e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.346314e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.366907e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.371833e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.371833e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060562e+00 +- 2.367612e+00 )  GeV^-4
-TOTAL       :     1.227523 sec
+TOTAL       :     1.204851 sec
 INFO: No Floating Point Exceptions have been reported
-     2,143,780,946      cycles                           #    1.742 GHz                    
-     3,461,620,552      instructions                     #    1.61  insn per cycle         
-       1.231794178 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3027) (512y:   25) (512z: 9681)
+     2,138,789,905      cycles                           #    1.770 GHz                    
+     3,461,611,954      instructions                     #    1.62  insn per cycle         
+       1.209183599 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3037) (512y:   25) (512z: 9677)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt
index e4e84be90e..2cbba9e698 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 02m 26s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-29_23:22:55
+DATE: 2024-09-18_12:47:39
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.043184e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.091633e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.097432e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.579593e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.616784e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.620542e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059597e+00 +- 2.368053e+00 )  GeV^-4
-TOTAL       :     0.492322 sec
+TOTAL       :     0.508462 sec
 INFO: No Floating Point Exceptions have been reported
-     2,017,019,452      cycles                           #    2.853 GHz                    
-     3,009,481,944      instructions                     #    1.49  insn per cycle         
-       0.767952594 seconds time elapsed
+     2,050,083,222      cycles                           #    2.848 GHz                    
+     2,995,129,166      instructions                     #    1.46  insn per cycle         
+       0.787145254 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.652929e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.728860e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.732304e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.605413e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.675177e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.678183e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.664703e+00 +- 5.072736e+00 )  GeV^-4
-TOTAL       :     1.733097 sec
+TOTAL       :     1.737190 sec
 INFO: No Floating Point Exceptions have been reported
-     5,734,163,299      cycles                           #    2.902 GHz                    
-    12,183,112,948      instructions                     #    2.12  insn per cycle         
-       2.035084293 seconds time elapsed
+     5,761,752,713      cycles                           #    2.921 GHz                    
+    12,131,218,179      instructions                     #    2.11  insn per cycle         
+       2.028782459 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2
@@ -101,27 +100,30 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.606445e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.607261e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.607261e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.602317e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.603102e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.603102e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059969e+00 +- 2.367799e+00 )  GeV^-4
-TOTAL       :    29.257397 sec
+TOTAL       :    29.279245 sec
 INFO: No Floating Point Exceptions have been reported
-    85,581,367,468      cycles                           #    2.925 GHz                    
-   135,289,170,958      instructions                     #    1.58  insn per cycle         
-      29.261520448 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:15198) (avx2:    0) (512y:    0) (512z:    0)
+    85,920,999,170      cycles                           #    2.934 GHz                    
+   135,650,935,446      instructions                     #    1.58  insn per cycle         
+      29.283501695 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:15856) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627535e-04
-Avg ME (F77/C++)    = 6.6275351218394313E-004
-Relative difference = 1.8383823081355348e-08
+Avg ME (F77/C++)    = 6.6275349717465765E-004
+Relative difference = 4.26303654465793e-09
 OK (relative difference <= 5E-3)
 =========================================================================
 runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP=
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.800616e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.812635e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.812635e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.859267e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.871489e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.871489e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059962e+00 +- 2.367792e+00 )  GeV^-4
-TOTAL       :     2.416640 sec
+TOTAL       :     2.395975 sec
 INFO: No Floating Point Exceptions have been reported
-     6,748,661,340      cycles                           #    2.789 GHz                    
-    19,356,380,273      instructions                     #    2.87  insn per cycle         
-       2.420837675 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:69590) (avx2:    0) (512y:    0) (512z:    0)
+     6,767,487,912      cycles                           #    2.821 GHz                    
+    19,352,953,840      instructions                     #    2.86  insn per cycle         
+       2.400276342 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:69577) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.426073e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.431391e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.431391e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.427993e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.433168e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.433168e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060903e+00 +- 2.367377e+00 )  GeV^-4
-TOTAL       :     1.155116 sec
+TOTAL       :     1.153582 sec
 INFO: No Floating Point Exceptions have been reported
-     3,167,666,280      cycles                           #    2.734 GHz                    
-     6,792,057,786      instructions                     #    2.14  insn per cycle         
-       1.159384177 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:48998) (512y:    0) (512z:    0)
+     3,172,176,609      cycles                           #    2.741 GHz                    
+     6,794,912,676      instructions                     #    2.14  insn per cycle         
+       1.157865028 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:49034) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.723429e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.731571e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.731571e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.725737e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.733579e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.733579e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060903e+00 +- 2.367377e+00 )  GeV^-4
-TOTAL       :     0.956806 sec
+TOTAL       :     0.955483 sec
 INFO: No Floating Point Exceptions have been reported
-     2,628,431,994      cycles                           #    2.738 GHz                    
-     5,970,160,001      instructions                     #    2.27  insn per cycle         
-       0.961004875 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:42589) (512y:   11) (512z:    0)
+     2,630,257,808      cycles                           #    2.742 GHz                    
+     5,970,030,267      instructions                     #    2.27  insn per cycle         
+       0.959792623 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:42602) (512y:   11) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.388958e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.393857e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.393857e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.398705e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.403700e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.403700e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060905e+00 +- 2.367377e+00 )  GeV^-4
-TOTAL       :     1.186044 sec
+TOTAL       :     1.177833 sec
 INFO: No Floating Point Exceptions have been reported
-     2,074,683,359      cycles                           #    1.744 GHz                    
-     3,493,764,691      instructions                     #    1.68  insn per cycle         
-       1.190513991 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5186) (512y:    3) (512z:44834)
+     2,074,489,030      cycles                           #    1.756 GHz                    
+     3,495,482,745      instructions                     #    1.68  insn per cycle         
+       1.182176144 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5208) (512y:    3) (512z:44858)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt
index 4a7fc1519c..307c9cbde7 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 02m 18s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-29_23:23:45
+DATE: 2024-09-18_12:48:29
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.126041e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.174718e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.179833e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.556326e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.594247e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.598112e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059597e+00 +- 2.368053e+00 )  GeV^-4
-TOTAL       :     0.489322 sec
+TOTAL       :     0.495550 sec
 INFO: No Floating Point Exceptions have been reported
-     2,017,513,819      cycles                           #    2.863 GHz                    
-     2,950,795,242      instructions                     #    1.46  insn per cycle         
-       0.765967894 seconds time elapsed
+     2,046,506,588      cycles                           #    2.866 GHz                    
+     3,036,453,126      instructions                     #    1.48  insn per cycle         
+       0.773976715 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.715485e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.792937e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.796402e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.676205e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.747820e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.750770e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.664703e+00 +- 5.072736e+00 )  GeV^-4
-TOTAL       :     1.717851 sec
+TOTAL       :     1.731569 sec
 INFO: No Floating Point Exceptions have been reported
-     5,615,997,179      cycles                           #    2.878 GHz                    
-    11,675,413,905      instructions                     #    2.08  insn per cycle         
-       2.007827364 seconds time elapsed
+     5,750,101,661      cycles                           #    2.911 GHz                    
+    12,015,194,090      instructions                     #    2.09  insn per cycle         
+       2.032327922 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2
@@ -101,27 +100,30 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.589879e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.590664e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.590664e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.582687e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.583472e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.583472e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059969e+00 +- 2.367799e+00 )  GeV^-4
-TOTAL       :    29.343679 sec
+TOTAL       :    29.381578 sec
 INFO: No Floating Point Exceptions have been reported
-    85,677,631,333      cycles                           #    2.920 GHz                    
-   135,714,984,488      instructions                     #    1.58  insn per cycle         
-      29.347732882 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:15490) (avx2:    0) (512y:    0) (512z:    0)
+    86,090,574,106      cycles                           #    2.930 GHz                    
+   135,364,281,032      instructions                     #    1.57  insn per cycle         
+      29.385785407 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:15471) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627535e-04
-Avg ME (F77/C++)    = 6.6275349723624727E-004
-Relative difference = 4.170106635889315e-09
+Avg ME (F77/C++)    = 6.6275349662128086E-004
+Relative difference = 5.098002770919431e-09
 OK (relative difference <= 5E-3)
 =========================================================================
 runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP=
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.717186e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.728871e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.728871e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.781191e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.793019e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.793019e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059962e+00 +- 2.367792e+00 )  GeV^-4
-TOTAL       :     2.446420 sec
+TOTAL       :     2.423420 sec
 INFO: No Floating Point Exceptions have been reported
-     6,827,807,387      cycles                           #    2.787 GHz                    
-    19,406,848,342      instructions                     #    2.84  insn per cycle         
-       2.450574027 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:69621) (avx2:    0) (512y:    0) (512z:    0)
+     6,852,713,563      cycles                           #    2.824 GHz                    
+    19,471,819,479      instructions                     #    2.84  insn per cycle         
+       2.427762808 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:69876) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.447940e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.453395e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.453395e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.462291e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.467817e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.467817e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060903e+00 +- 2.367377e+00 )  GeV^-4
-TOTAL       :     1.137607 sec
+TOTAL       :     1.126460 sec
 INFO: No Floating Point Exceptions have been reported
-     3,109,396,020      cycles                           #    2.725 GHz                    
-     6,715,608,220      instructions                     #    2.16  insn per cycle         
-       1.141840390 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:47685) (512y:    0) (512z:    0)
+     3,104,466,483      cycles                           #    2.747 GHz                    
+     6,715,454,919      instructions                     #    2.16  insn per cycle         
+       1.130606631 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:47692) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.708140e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.715722e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.715722e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.731919e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.740037e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.740037e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060903e+00 +- 2.367377e+00 )  GeV^-4
-TOTAL       :     0.965076 sec
+TOTAL       :     0.951895 sec
 INFO: No Floating Point Exceptions have been reported
-     2,632,093,230      cycles                           #    2.718 GHz                    
-     5,969,809,941      instructions                     #    2.27  insn per cycle         
-       0.969318362 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:41870) (512y:   13) (512z:    0)
+     2,625,337,295      cycles                           #    2.748 GHz                    
+     5,966,178,470      instructions                     #    2.27  insn per cycle         
+       0.956115789 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:41858) (512y:   13) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.389945e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.395095e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.395095e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.400560e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.405624e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.405624e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060905e+00 +- 2.367377e+00 )  GeV^-4
-TOTAL       :     1.185082 sec
+TOTAL       :     1.176100 sec
 INFO: No Floating Point Exceptions have been reported
-     2,071,495,587      cycles                           #    1.743 GHz                    
-     3,487,122,412      instructions                     #    1.68  insn per cycle         
-       1.189465321 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4150) (512y:    4) (512z:44485)
+     2,074,048,907      cycles                           #    1.758 GHz                    
+     3,487,720,369      instructions                     #    1.68  insn per cycle         
+       1.180409639 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4171) (512y:    4) (512z:44494)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt
index 5f8d97cb52..9378c125b2 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 01m 00s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-29_22:55:08
+DATE: 2024-09-18_12:18:25
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.467119e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.493597e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.495917e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.318122e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.344688e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.346795e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.527200 sec
+TOTAL       :     0.537854 sec
 INFO: No Floating Point Exceptions have been reported
-     2,179,141,526      cycles                           #    2.863 GHz                    
-     3,401,942,513      instructions                     #    1.56  insn per cycle         
-       0.820608598 seconds time elapsed
+     2,221,101,860      cycles                           #    2.870 GHz                    
+     3,456,789,338      instructions                     #    1.56  insn per cycle         
+       0.830636964 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.126981e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.159542e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.160911e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.135476e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.165199e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.166400e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.036741 sec
+TOTAL       :     3.042010 sec
 INFO: No Floating Point Exceptions have been reported
-     9,557,865,433      cycles                           #    2.900 GHz                    
-    21,892,950,666      instructions                     #    2.29  insn per cycle         
-       3.354787545 seconds time elapsed
+     9,635,962,932      cycles                           #    2.918 GHz                    
+    21,731,646,939      instructions                     #    2.26  insn per cycle         
+       3.358385171 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.846810e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.847689e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.847689e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.865433e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.866327e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.866327e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     8.886925 sec
+TOTAL       :     8.798260 sec
 INFO: No Floating Point Exceptions have been reported
-    25,956,193,105      cycles                           #    2.920 GHz                    
-    79,426,766,928      instructions                     #    3.06  insn per cycle         
-       8.891107748 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 4776) (avx2:    0) (512y:    0) (512z:    0)
+    25,923,427,719      cycles                           #    2.945 GHz                    
+    79,426,669,152      instructions                     #    3.06  insn per cycle         
+       8.802604907 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 4775) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.498157e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.501294e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.501294e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.509753e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.512944e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.512944e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     4.694558 sec
+TOTAL       :     4.679543 sec
 INFO: No Floating Point Exceptions have been reported
-    12,815,310,250      cycles                           #    2.728 GHz                    
-    38,825,821,027      instructions                     #    3.03  insn per cycle         
-       4.698934635 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:13172) (avx2:    0) (512y:    0) (512z:    0)
+    12,835,987,651      cycles                           #    2.741 GHz                    
+    38,823,362,502      instructions                     #    3.02  insn per cycle         
+       4.683930656 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:13173) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.030362e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.046971e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.046971e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.042437e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.059866e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.059866e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.048463 sec
+TOTAL       :     2.045674 sec
 INFO: No Floating Point Exceptions have been reported
-     5,583,919,538      cycles                           #    2.721 GHz                    
-    13,616,921,209      instructions                     #    2.44  insn per cycle         
-       2.052769914 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11415) (512y:    0) (512z:    0)
+     5,599,505,022      cycles                           #    2.733 GHz                    
+    13,616,194,882      instructions                     #    2.43  insn per cycle         
+       2.050016410 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11427) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.166890e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.188415e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.188415e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.300992e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.323362e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.323362e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.795655 sec
+TOTAL       :     1.769579 sec
 INFO: No Floating Point Exceptions have been reported
-     4,900,555,717      cycles                           #    2.724 GHz                    
-    12,295,343,674      instructions                     #    2.51  insn per cycle         
-       1.799955144 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10319) (512y:   79) (512z:    0)
+     4,864,538,423      cycles                           #    2.743 GHz                    
+    12,294,521,282      instructions                     #    2.53  insn per cycle         
+       1.774039102 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10331) (512y:   80) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.882736e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.894897e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.894897e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.972443e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.984642e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.984642e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.389543 sec
+TOTAL       :     2.358911 sec
 INFO: No Floating Point Exceptions have been reported
-     4,173,275,375      cycles                           #    1.744 GHz                    
-     6,390,919,884      instructions                     #    1.53  insn per cycle         
-       2.394037924 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1957) (512y:   93) (512z: 9359)
+     4,168,866,472      cycles                           #    1.765 GHz                    
+     6,393,098,618      instructions                     #    1.53  insn per cycle         
+       2.363390601 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1983) (512y:   92) (512z: 9360)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt
index c03b5276d2..032ee51884 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 55s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-08-29_22:55:41
+DATE: 2024-09-18_12:18:59
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.484238e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.511264e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.513583e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.323949e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.349755e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.352036e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.526008 sec
+TOTAL       :     0.534628 sec
 INFO: No Floating Point Exceptions have been reported
-     2,165,601,272      cycles                           #    2.860 GHz                    
-     3,419,597,650      instructions                     #    1.58  insn per cycle         
-       0.816374932 seconds time elapsed
+     2,204,767,059      cycles                           #    2.871 GHz                    
+     3,455,052,131      instructions                     #    1.57  insn per cycle         
+       0.826431777 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.140598e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.173249e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.174631e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.145238e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.175049e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.176235e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.029444 sec
+TOTAL       :     3.038517 sec
 INFO: No Floating Point Exceptions have been reported
-     9,525,068,079      cycles                           #    2.901 GHz                    
-    21,450,197,973      instructions                     #    2.25  insn per cycle         
-       3.341128233 seconds time elapsed
+     9,654,182,964      cycles                           #    2.928 GHz                    
+    20,172,707,879      instructions                     #    2.09  insn per cycle         
+       3.353606693 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.846603e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.847484e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.847484e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.861444e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.862342e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.862342e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     8.888642 sec
+TOTAL       :     8.816790 sec
 INFO: No Floating Point Exceptions have been reported
-    25,965,403,941      cycles                           #    2.920 GHz                    
-    79,452,311,631      instructions                     #    3.06  insn per cycle         
-       8.892834624 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 4432) (avx2:    0) (512y:    0) (512z:    0)
+    25,987,801,849      cycles                           #    2.947 GHz                    
+    79,452,087,213      instructions                     #    3.06  insn per cycle         
+       8.821027518 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 4431) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.472255e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.475382e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.475382e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.513306e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.516455e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.516455e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     4.729321 sec
+TOTAL       :     4.674356 sec
 INFO: No Floating Point Exceptions have been reported
-    12,829,809,001      cycles                           #    2.711 GHz                    
-    38,779,302,159      instructions                     #    3.02  insn per cycle         
-       4.733575001 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:12934) (avx2:    0) (512y:    0) (512z:    0)
+    12,813,296,665      cycles                           #    2.739 GHz                    
+    38,778,823,155      instructions                     #    3.03  insn per cycle         
+       4.678665662 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:12935) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.011768e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.027837e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.027837e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.042911e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.058963e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.058963e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.053124 sec
+TOTAL       :     2.045213 sec
 INFO: No Floating Point Exceptions have been reported
-     5,584,905,205      cycles                           #    2.716 GHz                    
-    13,731,123,403      instructions                     #    2.46  insn per cycle         
-       2.057449536 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11498) (512y:    0) (512z:    0)
+     5,589,546,199      cycles                           #    2.728 GHz                    
+    13,732,854,665      instructions                     #    2.46  insn per cycle         
+       2.049788655 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11510) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.018510e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.039054e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.039054e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.106583e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.127720e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.127720e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.824760 sec
+TOTAL       :     1.807350 sec
 INFO: No Floating Point Exceptions have been reported
-     4,960,617,330      cycles                           #    2.713 GHz                    
-    12,424,075,646      instructions                     #    2.50  insn per cycle         
-       1.829110632 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10310) (512y:  239) (512z:    0)
+     4,955,573,408      cycles                           #    2.736 GHz                    
+    12,423,027,135      instructions                     #    2.51  insn per cycle         
+       1.811880023 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10322) (512y:  240) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.840487e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.852650e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.852650e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.875797e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.888202e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.888202e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.403874 sec
+TOTAL       :     2.391557 sec
 INFO: No Floating Point Exceptions have been reported
-     4,178,528,364      cycles                           #    1.736 GHz                    
-     6,494,444,056      instructions                     #    1.55  insn per cycle         
-       2.408217411 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1780) (512y:  191) (512z: 9368)
+     4,183,217,410      cycles                           #    1.747 GHz                    
+     6,495,987,121      instructions                     #    1.55  insn per cycle         
+       2.396041838 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1806) (512y:  190) (512z: 9358)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
index 8fb9ffcb63..7ab313debd 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 14m 20s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -13,38 +9,19 @@ HASCURAND=hasCurand
 HASHIPRAND=hasNoHiprand
 Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
 make: Nothing to be done for 'gtestlibs'.
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-
-make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-DATE: 2024-08-29_22:57:33
+DATE: 2024-09-18_12:20:52
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +30,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.074025e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.074408e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.074613e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.053996e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.054389e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.054544e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     2.427313 sec
+TOTAL       :     2.439529 sec
 INFO: No Floating Point Exceptions have been reported
-     8,007,770,360      cycles                           #    2.905 GHz                    
-    17,844,373,075      instructions                     #    2.23  insn per cycle         
-       2.813382822 seconds time elapsed
+     8,096,284,346      cycles                           #    2.927 GHz                    
+    17,063,420,790      instructions                     #    2.11  insn per cycle         
+       2.826206150 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +48,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.214624e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.216736e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.217011e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.238045e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.240055e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.240313e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856249e-04 +- 8.329951e-05 )  GeV^-6
-TOTAL       :     4.008082 sec
+TOTAL       :     4.011196 sec
 INFO: No Floating Point Exceptions have been reported
-    12,658,170,825      cycles                           #    2.916 GHz                    
-    27,773,386,314      instructions                     #    2.19  insn per cycle         
-       4.398692801 seconds time elapsed
+    12,704,613,289      cycles                           #    2.925 GHz                    
+    30,115,204,727      instructions                     #    2.37  insn per cycle         
+       4.397191434 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -101,20 +81,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.557310e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.557536e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.557536e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.572616e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.572823e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.572823e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     6.987063 sec
+TOTAL       :     6.975845 sec
 INFO: No Floating Point Exceptions have been reported
-    18,965,145,224      cycles                           #    2.713 GHz                    
-    53,902,719,260      instructions                     #    2.84  insn per cycle         
-       6.991136344 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:32425) (avx2:    0) (512y:    0) (512z:    0)
+    19,035,417,803      cycles                           #    2.728 GHz                    
+    53,904,235,908      instructions                     #    2.83  insn per cycle         
+       6.980238056 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:32424) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -130,20 +113,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.549425e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.549510e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.549510e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.590030e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.590126e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.590126e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     3.409407 sec
+TOTAL       :     3.323749 sec
 INFO: No Floating Point Exceptions have been reported
-     9,942,874,655      cycles                           #    2.914 GHz                    
-    27,152,194,685      instructions                     #    2.73  insn per cycle         
-       3.413395307 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:96499) (avx2:    0) (512y:    0) (512z:    0)
+     9,780,563,101      cycles                           #    2.940 GHz                    
+    27,151,089,688      instructions                     #    2.78  insn per cycle         
+       3.328023666 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:96492) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -159,20 +145,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.265590e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.265969e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.265969e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.385331e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.385742e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.385742e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.617957 sec
+TOTAL       :     1.561733 sec
 INFO: No Floating Point Exceptions have been reported
-     4,341,974,803      cycles                           #    2.678 GHz                    
-     9,590,139,782      instructions                     #    2.21  insn per cycle         
-       1.622179116 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:84971) (512y:    0) (512z:    0)
+     4,266,182,969      cycles                           #    2.725 GHz                    
+     9,590,975,871      instructions                     #    2.25  insn per cycle         
+       1.566018474 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:84961) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -188,20 +177,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.848619e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.849166e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.849166e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.892057e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.892635e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.892635e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.373871 sec
+TOTAL       :     1.358913 sec
 INFO: No Floating Point Exceptions have been reported
-     3,731,717,521      cycles                           #    2.710 GHz                    
-     8,514,052,827      instructions                     #    2.28  insn per cycle         
-       1.377919646 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:80619) (512y:   89) (512z:    0)
+     3,729,263,843      cycles                           #    2.737 GHz                    
+     8,515,569,817      instructions                     #    2.28  insn per cycle         
+       1.363199183 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:80609) (512y:   90) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -217,20 +209,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.389946e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.390460e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.390460e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.395803e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.396338e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.396338e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.559836 sec
+TOTAL       :     1.556955 sec
 INFO: No Floating Point Exceptions have been reported
-     2,691,203,106      cycles                           #    1.722 GHz                    
-     4,280,751,304      instructions                     #    1.59  insn per cycle         
-       1.564007681 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2852) (512y:  103) (512z:79119)
+     2,698,860,839      cycles                           #    1.729 GHz                    
+     4,282,343,065      instructions                     #    1.59  insn per cycle         
+       1.561500058 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2856) (512y:  102) (512z:79114)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt
index ceb15640ef..5983376983 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 53s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -13,38 +9,19 @@ HASCURAND=hasCurand
 HASHIPRAND=hasNoHiprand
 Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
 make: Nothing to be done for 'gtestlibs'.
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-
-make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-DATE: 2024-08-29_23:31:10
+DATE: 2024-09-18_12:56:01
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -57,15 +34,16 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.070150e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.071117e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.071117e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.052616e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.054381e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.054381e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     2.392528 sec
+TOTAL       :     2.389054 sec
 INFO: No Floating Point Exceptions have been reported
-     7,866,237,206      cycles                           #    2.901 GHz                    
-    17,312,381,597      instructions                     #    2.20  insn per cycle         
-       2.769857787 seconds time elapsed
+INFO: No Floating Point Exceptions have been reported
+     7,904,157,038      cycles                           #    2.912 GHz                    
+    16,771,352,323      instructions                     #    2.12  insn per cycle         
+       2.770325050 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
@@ -83,19 +61,23 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.184574e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.218458e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.218458e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.237194e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.272545e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.272545e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856249e-04 +- 8.329951e-05 )  GeV^-6
-TOTAL       :     3.992587 sec
+TOTAL       :     3.987988 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    12,563,689,959      cycles                           #    2.909 GHz                    
-    27,497,425,735      instructions                     #    2.19  insn per cycle         
-       4.373540406 seconds time elapsed
+    12,604,681,487      cycles                           #    2.919 GHz                    
+    28,965,849,382      instructions                     #    2.30  insn per cycle         
+       4.373962640 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -114,20 +96,24 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.140589e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.140841e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.140841e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.613542e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.613748e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.613748e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     6.489238 sec
+TOTAL       :     6.936083 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    18,905,197,683      cycles                           #    2.912 GHz                    
-    53,904,164,076      instructions                     #    2.85  insn per cycle         
-       6.493192099 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:32425) (avx2:    0) (512y:    0) (512z:    0)
+    18,900,515,421      cycles                           #    2.724 GHz                    
+    53,905,451,035      instructions                     #    2.85  insn per cycle         
+       6.940621858 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:32424) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +130,24 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.554530e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.554616e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.554616e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.538785e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.538876e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.538876e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     3.398344 sec
+TOTAL       :     3.433615 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     9,911,477,696      cycles                           #    2.915 GHz                    
-    27,152,670,236      instructions                     #    2.74  insn per cycle         
-       3.402306403 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:96499) (avx2:    0) (512y:    0) (512z:    0)
+    10,052,781,401      cycles                           #    2.925 GHz                    
+    27,153,872,228      instructions                     #    2.70  insn per cycle         
+       3.438126502 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:96492) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -174,20 +164,24 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.355915e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.356318e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.356318e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.384986e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.385397e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.385397e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.575190 sec
+TOTAL       :     1.561732 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     4,291,905,087      cycles                           #    2.719 GHz                    
-     9,591,555,035      instructions                     #    2.23  insn per cycle         
-       1.579167801 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:84971) (512y:    0) (512z:    0)
+     4,257,385,748      cycles                           #    2.719 GHz                    
+     9,593,157,745      instructions                     #    2.25  insn per cycle         
+       1.566325188 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:84961) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -204,20 +198,24 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.851140e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.851660e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.851660e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.887075e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.887680e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.887680e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.373233 sec
+TOTAL       :     1.360664 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     3,746,937,682      cycles                           #    2.722 GHz                    
-     8,515,066,749      instructions                     #    2.27  insn per cycle         
-       1.377286559 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:80619) (512y:   89) (512z:    0)
+     3,718,394,007      cycles                           #    2.725 GHz                    
+     8,517,746,108      instructions                     #    2.29  insn per cycle         
+       1.365273931 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:80609) (512y:   90) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -234,20 +232,24 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.400870e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.401385e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.401385e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.422958e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.423581e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.423581e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.555325 sec
+TOTAL       :     1.545411 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,700,248,684      cycles                           #    1.732 GHz                    
-     4,281,718,976      instructions                     #    1.59  insn per cycle         
-       1.559584258 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2852) (512y:  103) (512z:79119)
+     2,703,115,511      cycles                           #    1.745 GHz                    
+     4,284,711,505      instructions                     #    1.59  insn per cycle         
+       1.550234745 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2856) (512y:  102) (512z:79114)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt
index d1172cfd54..6972883511 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 12m 16s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -13,38 +9,19 @@ HASCURAND=hasCurand
 HASHIPRAND=hasNoHiprand
 Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
 make: Nothing to be done for 'gtestlibs'.
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-
-make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-DATE: 2024-08-29_22:58:43
+DATE: 2024-09-18_12:22:20
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +30,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.066484e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.066965e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.067125e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.054893e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.055305e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.055482e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     2.430851 sec
+TOTAL       :     2.442249 sec
 INFO: No Floating Point Exceptions have been reported
-     7,996,520,980      cycles                           #    2.904 GHz                    
-    17,582,907,369      instructions                     #    2.20  insn per cycle         
-       2.809657895 seconds time elapsed
+     8,106,561,725      cycles                           #    2.931 GHz                    
+    17,204,264,784      instructions                     #    2.12  insn per cycle         
+       2.825101828 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +48,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.223670e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.225797e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.226061e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.195814e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.197984e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.198227e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856249e-04 +- 8.329951e-05 )  GeV^-6
-TOTAL       :     4.006618 sec
+TOTAL       :     4.015071 sec
 INFO: No Floating Point Exceptions have been reported
-    12,585,748,075      cycles                           #    2.903 GHz                    
-    29,987,504,897      instructions                     #    2.38  insn per cycle         
-       4.390914139 seconds time elapsed
+    12,724,131,626      cycles                           #    2.928 GHz                    
+    29,969,146,046      instructions                     #    2.36  insn per cycle         
+       4.400441096 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -101,20 +81,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.591203e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.591448e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.591448e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.111535e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.111769e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.111769e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     6.959272 sec
+TOTAL       :     6.512090 sec
 INFO: No Floating Point Exceptions have been reported
-    18,846,992,549      cycles                           #    2.708 GHz                    
-    53,934,698,792      instructions                     #    2.86  insn per cycle         
-       6.963312781 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:32023) (avx2:    0) (512y:    0) (512z:    0)
+    18,865,192,455      cycles                           #    2.896 GHz                    
+    53,932,477,912      instructions                     #    2.86  insn per cycle         
+       6.516216407 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:32022) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -130,20 +113,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.545189e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.545273e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.545273e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.566187e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.566277e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.566277e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     3.418174 sec
+TOTAL       :     3.374379 sec
 INFO: No Floating Point Exceptions have been reported
-     9,927,542,575      cycles                           #    2.902 GHz                    
-    27,129,401,925      instructions                     #    2.73  insn per cycle         
-       3.422241980 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:96375) (avx2:    0) (512y:    0) (512z:    0)
+     9,914,343,626      cycles                           #    2.935 GHz                    
+    27,131,823,716      instructions                     #    2.74  insn per cycle         
+       3.378885579 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:96368) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -159,20 +145,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.373922e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.374372e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.374372e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.354421e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.354826e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.354826e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.566638 sec
+TOTAL       :     1.575572 sec
 INFO: No Floating Point Exceptions have been reported
-     4,248,483,872      cycles                           #    2.706 GHz                    
-     9,585,288,003      instructions                     #    2.26  insn per cycle         
-       1.570770072 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:84978) (512y:    0) (512z:    0)
+     4,301,534,798      cycles                           #    2.724 GHz                    
+     9,586,207,937      instructions                     #    2.23  insn per cycle         
+       1.579825913 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:84968) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -188,20 +177,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.856631e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.857145e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.857145e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.882229e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.882764e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.882764e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.370752 sec
+TOTAL       :     1.361976 sec
 INFO: No Floating Point Exceptions have been reported
-     3,739,427,730      cycles                           #    2.721 GHz                    
-     8,506,975,276      instructions                     #    2.27  insn per cycle         
-       1.374833370 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:80642) (512y:  239) (512z:    0)
+     3,732,974,645      cycles                           #    2.734 GHz                    
+     8,507,919,232      instructions                     #    2.28  insn per cycle         
+       1.366219448 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:80632) (512y:  240) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -217,20 +209,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.397332e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.397841e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.397841e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.421560e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.422069e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.422069e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.557042 sec
+TOTAL       :     1.546646 sec
 INFO: No Floating Point Exceptions have been reported
-     2,701,434,125      cycles                           #    1.731 GHz                    
-     4,281,085,594      instructions                     #    1.58  insn per cycle         
-       1.561215984 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2689) (512y:  185) (512z:79103)
+     2,700,867,753      cycles                           #    1.742 GHz                    
+     4,281,876,861      instructions                     #    1.59  insn per cycle         
+       1.551074701 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2693) (512y:  184) (512z:79098)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt
index 3869640735..41f4336bf3 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 13m 34s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -13,38 +9,19 @@ HASCURAND=hasCurand
 HASHIPRAND=hasNoHiprand
 Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
 make: Nothing to be done for 'gtestlibs'.
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-
-make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-DATE: 2024-08-29_22:59:52
+DATE: 2024-09-18_12:23:48
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +30,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.292937e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.293697e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.294129e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.204897e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.205686e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.206019e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.186984e-05 +- 9.824899e-06 )  GeV^-6
-TOTAL       :     1.743289 sec
+TOTAL       :     1.744512 sec
 INFO: No Floating Point Exceptions have been reported
-     5,790,347,981      cycles                           #    2.896 GHz                    
-    11,721,375,787      instructions                     #    2.02  insn per cycle         
-       2.057769051 seconds time elapsed
+     5,890,882,031      cycles                           #    2.919 GHz                    
+    11,806,932,962      instructions                     #    2.00  insn per cycle         
+       2.074529782 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +48,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.132817e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.133477e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.133612e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.136881e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.137530e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.137618e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856829e-04 +- 8.333437e-05 )  GeV^-6
-TOTAL       :     2.042100 sec
+TOTAL       :     2.049597 sec
 INFO: No Floating Point Exceptions have been reported
-     6,657,158,515      cycles                           #    2.894 GHz                    
-    13,520,154,293      instructions                     #    2.03  insn per cycle         
-       2.357691181 seconds time elapsed
+     6,759,095,385      cycles                           #    2.923 GHz                    
+    14,845,205,038      instructions                     #    2.20  insn per cycle         
+       2.369358973 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -101,20 +81,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.397410e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.397670e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.397670e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.543544e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.543805e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.543805e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187013e-05 +- 9.825040e-06 )  GeV^-6
-TOTAL       :     6.289899 sec
+TOTAL       :     6.183077 sec
 INFO: No Floating Point Exceptions have been reported
-    18,272,622,117      cycles                           #    2.905 GHz                    
-    53,912,319,440      instructions                     #    2.95  insn per cycle         
-       6.293853770 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:20142) (avx2:    0) (512y:    0) (512z:    0)
+    18,161,151,116      cycles                           #    2.936 GHz                    
+    53,910,939,698      instructions                     #    2.97  insn per cycle         
+       6.187519652 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -130,20 +113,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.335991e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.336407e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.336407e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.361492e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.361888e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.361888e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187013e-05 +- 9.825037e-06 )  GeV^-6
-TOTAL       :     1.584050 sec
+TOTAL       :     1.573076 sec
 INFO: No Floating Point Exceptions have been reported
-     4,590,954,775      cycles                           #    2.892 GHz                    
-    13,807,119,292      instructions                     #    3.01  insn per cycle         
-       1.588052809 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:97022) (avx2:    0) (512y:    0) (512z:    0)
+     4,616,676,545      cycles                           #    2.928 GHz                    
+    13,807,548,367      instructions                     #    2.99  insn per cycle         
+       1.577368513 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:97016) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -159,20 +145,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.716316e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.717989e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.717989e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.784398e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.786227e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.786227e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826767e-06 )  GeV^-6
-TOTAL       :     0.788124 sec
+TOTAL       :     0.780447 sec
 INFO: No Floating Point Exceptions have been reported
-     2,144,103,788      cycles                           #    2.709 GHz                    
-     4,836,269,392      instructions                     #    2.26  insn per cycle         
-       0.792211489 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:85497) (512y:    0) (512z:    0)
+     2,130,555,516      cycles                           #    2.717 GHz                    
+     4,837,275,089      instructions                     #    2.27  insn per cycle         
+       0.784743576 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:85494) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -188,20 +177,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.724734e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.726965e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.726965e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.634242e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.636553e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.636553e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826767e-06 )  GeV^-6
-TOTAL       :     0.685445 sec
+TOTAL       :     0.693797 sec
 INFO: No Floating Point Exceptions have been reported
-     1,870,591,030      cycles                           #    2.716 GHz                    
-     4,290,211,642      instructions                     #    2.29  insn per cycle         
-       0.689481731 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:81190) (512y:   44) (512z:    0)
+     1,903,490,036      cycles                           #    2.729 GHz                    
+     4,291,225,209      instructions                     #    2.25  insn per cycle         
+       0.698112096 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:81183) (512y:   45) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -217,20 +209,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.811640e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.813711e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.813711e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.885404e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.887629e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.887629e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826771e-06 )  GeV^-6
-TOTAL       :     0.777937 sec
+TOTAL       :     0.769903 sec
 INFO: No Floating Point Exceptions have been reported
-     1,355,244,615      cycles                           #    1.734 GHz                    
-     2,161,868,001      instructions                     #    1.60  insn per cycle         
-       0.782071569 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3469) (512y:   47) (512z:79334)
+     1,354,371,935      cycles                           #    1.750 GHz                    
+     2,162,822,545      instructions                     #    1.60  insn per cycle         
+       0.774469858 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3481) (512y:   45) (512z:79330)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt
index 5830ab0747..8d8b09449b 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 04s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -13,38 +9,19 @@ HASCURAND=hasCurand
 HASHIPRAND=hasNoHiprand
 Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
 make: Nothing to be done for 'gtestlibs'.
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-
-make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-DATE: 2024-08-29_23:32:19
+DATE: 2024-09-18_12:57:30
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -57,15 +34,16 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.301417e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.303072e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.303072e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.296128e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.300632e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.300632e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187093e-05 +- 9.825663e-06 )  GeV^-6
-TOTAL       :     1.680148 sec
+TOTAL       :     1.691925 sec
 INFO: No Floating Point Exceptions have been reported
-     5,629,091,684      cycles                           #    2.907 GHz                    
-    12,087,888,534      instructions                     #    2.15  insn per cycle         
-       1.992324000 seconds time elapsed
+INFO: No Floating Point Exceptions have been reported
+     5,650,162,983      cycles                           #    2.892 GHz                    
+    11,596,549,862      instructions                     #    2.05  insn per cycle         
+       2.010258263 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
@@ -83,19 +61,23 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.092652e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.104101e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.104101e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.106225e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.117844e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.117844e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856440e-04 +- 8.331091e-05 )  GeV^-6
-TOTAL       :     2.059612 sec
+TOTAL       :     2.039868 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     6,706,217,109      cycles                           #    2.898 GHz                    
-    14,832,815,446      instructions                     #    2.21  insn per cycle         
-       2.372868228 seconds time elapsed
+     6,704,150,880      cycles                           #    2.913 GHz                    
+    14,933,981,007      instructions                     #    2.23  insn per cycle         
+       2.357689511 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -114,20 +96,24 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.505841e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.506106e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.506106e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.476123e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.476381e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.476381e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187013e-05 +- 9.825040e-06 )  GeV^-6
-TOTAL       :     6.209814 sec
+TOTAL       :     6.231080 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    18,140,361,227      cycles                           #    2.920 GHz                    
-    53,910,858,548      instructions                     #    2.97  insn per cycle         
-       6.213739946 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:20142) (avx2:    0) (512y:    0) (512z:    0)
+    18,168,605,946      cycles                           #    2.914 GHz                    
+    53,913,151,543      instructions                     #    2.97  insn per cycle         
+       6.235604617 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +130,24 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.372408e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.372872e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.372872e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.367327e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.367745e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.367745e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187013e-05 +- 9.825037e-06 )  GeV^-6
-TOTAL       :     1.567410 sec
+TOTAL       :     1.570359 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     4,594,145,112      cycles                           #    2.927 GHz                    
-    13,808,050,244      instructions                     #    3.01  insn per cycle         
-       1.571423635 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:97022) (avx2:    0) (512y:    0) (512z:    0)
+     4,609,204,013      cycles                           #    2.928 GHz                    
+    13,810,618,137      instructions                     #    3.00  insn per cycle         
+       1.574904752 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:97016) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -174,20 +164,24 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.731625e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.733291e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.733291e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.813057e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.814753e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.814753e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826767e-06 )  GeV^-6
-TOTAL       :     0.786425 sec
+TOTAL       :     0.777334 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,145,796,069      cycles                           #    2.717 GHz                    
-     4,836,929,539      instructions                     #    2.25  insn per cycle         
-       0.790483054 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:85497) (512y:    0) (512z:    0)
+     2,130,492,369      cycles                           #    2.727 GHz                    
+     4,838,939,909      instructions                     #    2.27  insn per cycle         
+       0.781848874 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:85494) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -204,20 +198,24 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.692135e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.694759e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.694759e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.672152e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.674283e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.674283e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826767e-06 )  GeV^-6
-TOTAL       :     0.688369 sec
+TOTAL       :     0.690681 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     1,874,357,075      cycles                           #    2.709 GHz                    
-     4,291,087,124      instructions                     #    2.29  insn per cycle         
-       0.692357424 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:81190) (512y:   44) (512z:    0)
+     1,888,040,180      cycles                           #    2.718 GHz                    
+     4,293,435,273      instructions                     #    2.27  insn per cycle         
+       0.695178892 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:81183) (512y:   45) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -234,20 +232,24 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.794290e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.796316e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.796316e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.827455e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.829435e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.829435e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826771e-06 )  GeV^-6
-TOTAL       :     0.779484 sec
+TOTAL       :     0.776050 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     1,355,608,664      cycles                           #    1.732 GHz                    
-     2,162,825,766      instructions                     #    1.60  insn per cycle         
-       0.783659981 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3469) (512y:   47) (512z:79334)
+     1,356,992,115      cycles                           #    1.740 GHz                    
+     2,165,171,343      instructions                     #    1.60  insn per cycle         
+       0.780688696 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3481) (512y:   45) (512z:79330)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt
index 265acd58cc..43e4fd4779 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 11m 07s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -13,38 +9,19 @@ HASCURAND=hasCurand
 HASHIPRAND=hasNoHiprand
 Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
 make: Nothing to be done for 'gtestlibs'.
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-
-make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-DATE: 2024-08-29_23:00:42
+DATE: 2024-09-18_12:24:51
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +30,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.278308e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.279020e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.279441e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.195253e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.195989e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.196280e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.186984e-05 +- 9.824899e-06 )  GeV^-6
-TOTAL       :     1.741721 sec
+TOTAL       :     1.748628 sec
 INFO: No Floating Point Exceptions have been reported
-     5,810,428,962      cycles                           #    2.906 GHz                    
-    11,824,497,448      instructions                     #    2.04  insn per cycle         
-       2.055558992 seconds time elapsed
+     5,866,556,695      cycles                           #    2.917 GHz                    
+    12,565,857,650      instructions                     #    2.14  insn per cycle         
+       2.067294353 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +48,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.122942e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.123570e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.123692e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.121566e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.122211e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.122327e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856829e-04 +- 8.333437e-05 )  GeV^-6
-TOTAL       :     2.062523 sec
+TOTAL       :     2.053021 sec
 INFO: No Floating Point Exceptions have been reported
-     6,728,286,198      cycles                           #    2.901 GHz                    
-    14,206,939,018      instructions                     #    2.11  insn per cycle         
-       2.377928524 seconds time elapsed
+     6,778,897,896      cycles                           #    2.924 GHz                    
+    14,985,250,436      instructions                     #    2.21  insn per cycle         
+       2.374125707 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -101,20 +81,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.479074e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.479333e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.479333e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.587070e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.587332e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.587332e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187013e-05 +- 9.825040e-06 )  GeV^-6
-TOTAL       :     6.228099 sec
+TOTAL       :     6.151381 sec
 INFO: No Floating Point Exceptions have been reported
-    18,108,781,113      cycles                           #    2.907 GHz                    
-    53,894,957,040      instructions                     #    2.98  insn per cycle         
-       6.232064949 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:20142) (avx2:    0) (512y:    0) (512z:    0)
+    18,055,403,744      cycles                           #    2.934 GHz                    
+    53,896,033,902      instructions                     #    2.99  insn per cycle         
+       6.155606485 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -130,20 +113,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.364519e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.364927e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.364927e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.398632e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.399059e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.399059e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187013e-05 +- 9.825037e-06 )  GeV^-6
-TOTAL       :     1.571241 sec
+TOTAL       :     1.555535 sec
 INFO: No Floating Point Exceptions have been reported
-     4,583,763,877      cycles                           #    2.911 GHz                    
-    13,799,671,765      instructions                     #    3.01  insn per cycle         
-       1.575253406 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:96657) (avx2:    0) (512y:    0) (512z:    0)
+     4,569,755,461      cycles                           #    2.931 GHz                    
+    13,800,747,699      instructions                     #    3.02  insn per cycle         
+       1.559859354 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:96651) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -159,20 +145,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.683095e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.684725e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.684725e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.803652e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.805665e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.805665e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826767e-06 )  GeV^-6
-TOTAL       :     0.791842 sec
+TOTAL       :     0.778546 sec
 INFO: No Floating Point Exceptions have been reported
-     2,153,132,055      cycles                           #    2.708 GHz                    
-     4,839,924,419      instructions                     #    2.25  insn per cycle         
-       0.795824411 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:85887) (512y:    0) (512z:    0)
+     2,147,523,686      cycles                           #    2.745 GHz                    
+     4,840,927,245      instructions                     #    2.25  insn per cycle         
+       0.782889882 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:85884) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -188,20 +177,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.585098e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.587181e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.587181e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.693768e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.696106e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.696106e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826767e-06 )  GeV^-6
-TOTAL       :     0.698079 sec
+TOTAL       :     0.688038 sec
 INFO: No Floating Point Exceptions have been reported
-     1,896,280,008      cycles                           #    2.704 GHz                    
-     4,294,088,324      instructions                     #    2.26  insn per cycle         
-       0.702136523 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:81730) (512y:   24) (512z:    0)
+     1,894,736,849      cycles                           #    2.739 GHz                    
+     4,295,025,191      instructions                     #    2.27  insn per cycle         
+       0.692237484 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:81725) (512y:   25) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -217,20 +209,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.721862e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.723869e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.723869e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.859865e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.862153e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.862153e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826771e-06 )  GeV^-6
-TOTAL       :     0.787800 sec
+TOTAL       :     0.772052 sec
 INFO: No Floating Point Exceptions have been reported
-     1,358,400,068      cycles                           #    1.717 GHz                    
-     2,168,635,540      instructions                     #    1.60  insn per cycle         
-       0.791988289 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4082) (512y:   32) (512z:79555)
+     1,359,092,301      cycles                           #    1.753 GHz                    
+     2,169,957,409      instructions                     #    1.60  insn per cycle         
+       0.776490041 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4092) (512y:   32) (512z:79551)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt
index 51124f036b..e02407d644 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 14m 56s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -13,38 +9,19 @@ HASCURAND=hasCurand
 HASHIPRAND=hasNoHiprand
 Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
 make: Nothing to be done for 'gtestlibs'.
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-
-make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-DATE: 2024-08-29_23:01:32
+DATE: 2024-09-18_12:25:53
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +30,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.688985e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.689689e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.689968e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.664550e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.665186e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.665405e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     2.177420 sec
+TOTAL       :     2.193141 sec
 INFO: No Floating Point Exceptions have been reported
-     7,251,729,774      cycles                           #    2.900 GHz                    
-    16,346,188,279      instructions                     #    2.25  insn per cycle         
-       2.556548527 seconds time elapsed
+     7,365,717,542      cycles                           #    2.923 GHz                    
+    16,291,118,073      instructions                     #    2.21  insn per cycle         
+       2.576836591 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +48,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.113584e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.113894e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.113942e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.102923e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.103231e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.103265e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856249e-04 +- 8.329951e-05 )  GeV^-6
-TOTAL       :     3.408716 sec
+TOTAL       :     3.419785 sec
 INFO: No Floating Point Exceptions have been reported
-    10,895,818,319      cycles                           #    2.917 GHz                    
-    25,195,509,626      instructions                     #    2.31  insn per cycle         
-       3.791268467 seconds time elapsed
+    10,963,927,138      cycles                           #    2.923 GHz                    
+    24,861,261,596      instructions                     #    2.27  insn per cycle         
+       3.806537159 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -101,20 +81,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.498375e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.498600e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.498600e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.500673e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.500867e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.500867e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     7.041519 sec
+TOTAL       :     7.041188 sec
 INFO: No Floating Point Exceptions have been reported
-    19,169,796,431      cycles                           #    2.721 GHz                    
-    54,133,266,233      instructions                     #    2.82  insn per cycle         
-       7.045435972 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:32001) (avx2:    0) (512y:    0) (512z:    0)
+    19,221,485,171      cycles                           #    2.729 GHz                    
+    54,134,690,618      instructions                     #    2.82  insn per cycle         
+       7.045507456 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:32000) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -130,20 +113,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.531964e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.532049e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.532049e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.537074e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.537163e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.537163e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     3.448576 sec
+TOTAL       :     3.437220 sec
 INFO: No Floating Point Exceptions have been reported
-     9,370,462,356      cycles                           #    2.715 GHz                    
-    26,186,953,076      instructions                     #    2.79  insn per cycle         
-       3.452660252 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:96048) (avx2:    0) (512y:    0) (512z:    0)
+     9,396,080,919      cycles                           #    2.731 GHz                    
+    26,188,082,836      instructions                     #    2.79  insn per cycle         
+       3.441517756 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:96049) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -159,20 +145,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.485181e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.485611e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.485611e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.541134e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.541635e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.541635e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     1.516415 sec
+TOTAL       :     1.493238 sec
 INFO: No Floating Point Exceptions have been reported
-     4,087,096,426      cycles                           #    2.689 GHz                    
-     9,248,408,142      instructions                     #    2.26  insn per cycle         
-       1.520508640 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:84378) (512y:    0) (512z:    0)
+     4,077,957,635      cycles                           #    2.724 GHz                    
+     9,249,641,886      instructions                     #    2.27  insn per cycle         
+       1.497708781 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:84390) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -188,20 +177,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.041285e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.041878e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.041878e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.136665e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.137271e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.137271e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     1.309906 sec
+TOTAL       :     1.279311 sec
 INFO: No Floating Point Exceptions have been reported
-     3,507,609,505      cycles                           #    2.671 GHz                    
-     8,182,225,015      instructions                     #    2.33  insn per cycle         
-       1.313975216 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:80003) (512y:   79) (512z:    0)
+     3,517,339,720      cycles                           #    2.742 GHz                    
+     8,183,228,052      instructions                     #    2.33  insn per cycle         
+       1.283633317 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:80015) (512y:   80) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -217,20 +209,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.418742e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.419249e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.419249e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.501058e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.501647e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.501647e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     1.546152 sec
+TOTAL       :     1.511075 sec
 INFO: No Floating Point Exceptions have been reported
-     2,666,659,514      cycles                           #    1.721 GHz                    
-     4,172,128,592      instructions                     #    1.56  insn per cycle         
-       1.550318622 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2589) (512y:   93) (512z:78909)
+     2,666,286,599      cycles                           #    1.760 GHz                    
+     4,173,044,119      instructions                     #    1.57  insn per cycle         
+       1.515586960 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2615) (512y:   92) (512z:78910)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt
index ff1435b3e2..59afbf5683 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 12m 07s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -13,38 +9,19 @@ HASCURAND=hasCurand
 HASHIPRAND=hasNoHiprand
 Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1)
 make: Nothing to be done for 'gtestlibs'.
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-
-make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make: Nothing to be done for 'all'.
 
-DATE: 2024-08-29_23:02:41
+DATE: 2024-09-18_12:27:20
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +30,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.674691e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.675223e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.675466e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.673618e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.674137e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.674360e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     2.180460 sec
+TOTAL       :     2.187725 sec
 INFO: No Floating Point Exceptions have been reported
-     7,285,576,606      cycles                           #    2.911 GHz                    
-    16,299,901,303      instructions                     #    2.24  insn per cycle         
-       2.558450596 seconds time elapsed
+     7,320,649,548      cycles                           #    2.912 GHz                    
+    16,262,382,237      instructions                     #    2.22  insn per cycle         
+       2.571114049 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +48,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.109231e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.109533e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.109571e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.105826e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.106139e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.106173e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856249e-04 +- 8.329951e-05 )  GeV^-6
-TOTAL       :     3.406546 sec
+TOTAL       :     3.426824 sec
 INFO: No Floating Point Exceptions have been reported
-    10,890,129,022      cycles                           #    2.909 GHz                    
-    23,762,395,814      instructions                     #    2.18  insn per cycle         
-       3.800131530 seconds time elapsed
+    11,001,686,020      cycles                           #    2.930 GHz                    
+    25,147,468,300      instructions                     #    2.29  insn per cycle         
+       3.812692076 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -101,20 +81,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.038908e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.039135e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.039135e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.043178e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.043403e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.043403e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     6.569694 sec
+TOTAL       :     6.566514 sec
 INFO: No Floating Point Exceptions have been reported
-    19,149,215,101      cycles                           #    2.913 GHz                    
-    54,156,003,574      instructions                     #    2.83  insn per cycle         
-       6.574054312 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:32203) (avx2:    0) (512y:    0) (512z:    0)
+    19,176,347,779      cycles                           #    2.919 GHz                    
+    54,156,968,111      instructions                     #    2.82  insn per cycle         
+       6.570813145 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:32202) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -130,20 +113,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.519464e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.519556e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.519556e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.555217e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.555303e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.555303e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     3.477621 sec
+TOTAL       :     3.398105 sec
 INFO: No Floating Point Exceptions have been reported
-     9,441,874,473      cycles                           #    2.713 GHz                    
-    26,086,760,463      instructions                     #    2.76  insn per cycle         
-       3.481656660 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:95937) (avx2:    0) (512y:    0) (512z:    0)
+     9,273,027,189      cycles                           #    2.726 GHz                    
+    26,087,136,722      instructions                     #    2.81  insn per cycle         
+       3.402445291 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:95935) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -159,20 +145,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.547459e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.547914e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.547914e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.537227e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.537679e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.537679e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     1.490795 sec
+TOTAL       :     1.494127 sec
 INFO: No Floating Point Exceptions have been reported
-     4,054,842,242      cycles                           #    2.714 GHz                    
-     9,212,719,836      instructions                     #    2.27  insn per cycle         
-       1.494856862 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:83852) (512y:    0) (512z:    0)
+     4,071,118,335      cycles                           #    2.719 GHz                    
+     9,214,803,224      instructions                     #    2.26  insn per cycle         
+       1.498443184 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:83864) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -188,20 +177,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.103065e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.103686e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.103686e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.138433e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.139090e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.139090e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     1.288731 sec
+TOTAL       :     1.277732 sec
 INFO: No Floating Point Exceptions have been reported
-     3,507,522,727      cycles                           #    2.715 GHz                    
-     8,167,227,176      instructions                     #    2.33  insn per cycle         
-       1.292784178 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:79409) (512y:  229) (512z:    0)
+     3,507,535,748      cycles                           #    2.738 GHz                    
+     8,168,319,774      instructions                     #    2.33  insn per cycle         
+       1.282049677 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:79421) (512y:  230) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -217,20 +209,23 @@ Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.475640e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.476182e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.476182e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.543576e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.544114e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.544114e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     1.520957 sec
+TOTAL       :     1.493048 sec
 INFO: No Floating Point Exceptions have been reported
-     2,623,180,222      cycles                           #    1.721 GHz                    
-     4,166,715,132      instructions                     #    1.59  insn per cycle         
-       1.525139188 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1853) (512y:  175) (512z:78883)
+     2,621,670,941      cycles                           #    1.752 GHz                    
+     4,167,760,475      instructions                     #    1.59  insn per cycle         
+       1.497511330 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1879) (512y:  174) (512z:78884)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt
index 8f58b467db..b0413f07b6 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 01m 07s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-08-29_22:56:15
+DATE: 2024-09-18_12:19:33
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.726362e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.262701e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.610286e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.879954e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.891707e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.001488e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.451987 sec
+TOTAL       :     0.459044 sec
 INFO: No Floating Point Exceptions have been reported
-     1,896,871,752      cycles                           #    2.848 GHz                    
-     2,681,389,264      instructions                     #    1.41  insn per cycle         
-       0.724279481 seconds time elapsed
+     1,939,663,698      cycles                           #    2.864 GHz                    
+     2,747,739,655      instructions                     #    1.42  insn per cycle         
+       0.734387710 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.348461e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.144104e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.549018e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.061438e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.512391e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.741979e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.602505e+02 +- 2.116328e+02 )  GeV^-2
-TOTAL       :     0.533005 sec
+TOTAL       :     0.541789 sec
 INFO: No Floating Point Exceptions have been reported
-     2,189,534,054      cycles                           #    2.846 GHz                    
-     3,087,782,694      instructions                     #    1.41  insn per cycle         
-       0.826717021 seconds time elapsed
+     2,258,330,350      cycles                           #    2.885 GHz                    
+     3,233,524,764      instructions                     #    1.43  insn per cycle         
+       0.842114758 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.051761e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.074255e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.074255e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.056900e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.079897e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.079897e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     1.577567 sec
+TOTAL       :     1.570278 sec
 INFO: No Floating Point Exceptions have been reported
-     4,620,045,988      cycles                           #    2.922 GHz                    
-    13,190,317,664      instructions                     #    2.86  insn per cycle         
-       1.581771455 seconds time elapsed
+     4,626,289,546      cycles                           #    2.939 GHz                    
+    13,191,201,959      instructions                     #    2.85  insn per cycle         
+       1.574568894 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  707) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.844349e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.914518e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.914518e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.877819e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.949205e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.949205e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.907812 sec
+TOTAL       :     0.891875 sec
 INFO: No Floating Point Exceptions have been reported
-     2,648,120,227      cycles                           #    2.907 GHz                    
-     7,555,054,987      instructions                     #    2.85  insn per cycle         
-       0.912056964 seconds time elapsed
+     2,638,327,743      cycles                           #    2.947 GHz                    
+     7,555,209,951      instructions                     #    2.86  insn per cycle         
+       0.896114078 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.046694e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.243521e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.243521e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.170773e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.377039e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.377039e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.557677 sec
+TOTAL       :     0.536262 sec
 INFO: No Floating Point Exceptions have been reported
-     1,493,865,953      cycles                           #    2.662 GHz                    
-     3,159,494,458      instructions                     #    2.11  insn per cycle         
-       0.561947750 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2984) (512y:    0) (512z:    0)
+     1,489,383,659      cycles                           #    2.759 GHz                    
+     3,159,296,473      instructions                     #    2.12  insn per cycle         
+       0.540558254 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2991) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.460167e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.708483e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.708483e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.529419e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.784986e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.784986e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.493228 sec
+TOTAL       :     0.483576 sec
 INFO: No Floating Point Exceptions have been reported
-     1,352,040,215      cycles                           #    2.721 GHz                    
-     3,014,341,429      instructions                     #    2.23  insn per cycle         
-       0.497512207 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2745) (512y:  104) (512z:    0)
+     1,345,705,641      cycles                           #    2.762 GHz                    
+     3,013,816,668      instructions                     #    2.24  insn per cycle         
+       0.487835073 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2749) (512y:  104) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.284889e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.393571e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.393571e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.357874e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.470306e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.470306e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.739242 sec
+TOTAL       :     0.716743 sec
 INFO: No Floating Point Exceptions have been reported
-     1,329,435,566      cycles                           #    1.790 GHz                    
-     1,962,380,777      instructions                     #    1.48  insn per cycle         
-       0.743503104 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1367) (512y:  106) (512z: 2217)
+     1,329,087,485      cycles                           #    1.845 GHz                    
+     1,962,911,490      instructions                     #    1.48  insn per cycle         
+       0.721045759 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1379) (512y:  106) (512z: 2218)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt
index 26b4e39e87..e338aa0c83 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 16s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-08-29_23:29:44
+DATE: 2024-09-18_12:54:34
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -57,15 +53,16 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.479417e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.184083e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.184083e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.300988e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.591479e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.591479e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.479274 sec
+TOTAL       :     0.483517 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     1,978,004,369      cycles                           #    2.865 GHz                    
-     2,955,147,787      instructions                     #    1.49  insn per cycle         
-       0.748924266 seconds time elapsed
+     2,018,637,546      cycles                           #    2.889 GHz                    
+     3,002,221,313      instructions                     #    1.49  insn per cycle         
+       0.755433693 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
@@ -83,19 +80,23 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.246218e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.372237e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.372237e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.209513e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.250583e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.250583e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.602505e+02 +- 2.116328e+02 )  GeV^-2
-TOTAL       :     0.755791 sec
+TOTAL       :     0.757854 sec
 INFO: No Floating Point Exceptions have been reported
-     2,914,728,097      cycles                           #    2.881 GHz                    
-     4,500,187,574      instructions                     #    1.54  insn per cycle         
-       1.069998281 seconds time elapsed
+INFO: No Floating Point Exceptions have been reported
+     2,924,491,267      cycles                           #    2.893 GHz                    
+     4,472,331,439      instructions                     #    1.53  insn per cycle         
+       1.067931667 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -114,20 +115,24 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.050932e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.074087e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.074087e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.054217e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.077389e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.077389e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     1.586469 sec
+TOTAL       :     1.581825 sec
 INFO: No Floating Point Exceptions have been reported
-     4,664,946,821      cycles                           #    2.934 GHz                    
-    13,198,355,646      instructions                     #    2.83  insn per cycle         
-       1.590859708 seconds time elapsed
+INFO: No Floating Point Exceptions have been reported
+     4,664,515,506      cycles                           #    2.942 GHz                    
+    13,198,020,525      instructions                     #    2.83  insn per cycle         
+       1.586342613 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  707) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,24 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.808190e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.878202e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.878202e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.861182e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.934526e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.934526e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.932993 sec
+TOTAL       :     0.908066 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,689,645,029      cycles                           #    2.872 GHz                    
-     7,602,533,127      instructions                     #    2.83  insn per cycle         
-       0.937357913 seconds time elapsed
+     2,683,422,373      cycles                           #    2.942 GHz                    
+     7,604,693,273      instructions                     #    2.83  insn per cycle         
+       0.912668086 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -174,20 +183,24 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.098794e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.305964e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.305964e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.136463e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.344918e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.344918e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.556740 sec
+TOTAL       :     0.550693 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     1,538,126,729      cycles                           #    2.743 GHz                    
-     3,210,272,712      instructions                     #    2.09  insn per cycle         
-       0.561227268 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2984) (512y:    0) (512z:    0)
+     1,532,887,808      cycles                           #    2.763 GHz                    
+     3,210,306,872      instructions                     #    2.09  insn per cycle         
+       0.555384102 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2991) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -204,20 +217,24 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.454466e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.709491e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.709491e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.483226e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.741231e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.741231e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.502503 sec
+TOTAL       :     0.498787 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     1,390,826,167      cycles                           #    2.746 GHz                    
-     3,065,263,449      instructions                     #    2.20  insn per cycle         
-       0.507090204 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2745) (512y:  104) (512z:    0)
+     1,390,412,454      cycles                           #    2.766 GHz                    
+     3,064,189,434      instructions                     #    2.20  insn per cycle         
+       0.503409402 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2749) (512y:  104) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -234,20 +251,24 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.304175e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.415855e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.415855e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.324425e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.438930e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.438930e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.741021 sec
+TOTAL       :     0.734309 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     1,370,544,184      cycles                           #    1.840 GHz                    
-     2,002,405,410      instructions                     #    1.46  insn per cycle         
-       0.745593381 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1367) (512y:  106) (512z: 2217)
+     1,369,927,300      cycles                           #    1.856 GHz                    
+     2,000,629,444      instructions                     #    1.46  insn per cycle         
+       0.738870915 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1379) (512y:  106) (512z: 2218)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt
index 2a1fefe99c..698af75849 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 48s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-08-29_22:56:28
+DATE: 2024-09-18_12:19:47
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.712805e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.193466e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.523668e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.870113e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.853917e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.966787e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.449950 sec
+TOTAL       :     0.459366 sec
 INFO: No Floating Point Exceptions have been reported
-     1,900,965,307      cycles                           #    2.859 GHz                    
-     2,713,634,896      instructions                     #    1.43  insn per cycle         
-       0.722081118 seconds time elapsed
+     1,939,416,729      cycles                           #    2.875 GHz                    
+     2,719,660,225      instructions                     #    1.40  insn per cycle         
+       0.733418344 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.327514e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.030393e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.419287e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.000378e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.373625e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.579737e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.602505e+02 +- 2.116328e+02 )  GeV^-2
-TOTAL       :     0.533691 sec
+TOTAL       :     0.544100 sec
 INFO: No Floating Point Exceptions have been reported
-     2,202,998,596      cycles                           #    2.860 GHz                    
-     3,163,743,502      instructions                     #    1.44  insn per cycle         
-       0.827476712 seconds time elapsed
+     2,268,193,870      cycles                           #    2.866 GHz                    
+     3,228,698,159      instructions                     #    1.42  insn per cycle         
+       0.849845463 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.053904e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.076658e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.076658e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.061980e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.085030e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.085030e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     1.574324 sec
+TOTAL       :     1.562513 sec
 INFO: No Floating Point Exceptions have been reported
-     4,618,630,520      cycles                           #    2.927 GHz                    
-    13,179,133,059      instructions                     #    2.85  insn per cycle         
-       1.578415029 seconds time elapsed
+     4,622,072,256      cycles                           #    2.951 GHz                    
+    13,179,636,938      instructions                     #    2.85  insn per cycle         
+       1.566824554 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  692) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.827604e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.897483e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.897483e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.876350e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.948368e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.948368e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.915792 sec
+TOTAL       :     0.892202 sec
 INFO: No Floating Point Exceptions have been reported
-     2,649,406,486      cycles                           #    2.883 GHz                    
-     7,553,573,033      instructions                     #    2.85  insn per cycle         
-       0.920139852 seconds time elapsed
+     2,639,628,239      cycles                           #    2.947 GHz                    
+     7,552,826,806      instructions                     #    2.86  insn per cycle         
+       0.896585147 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3093) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.150256e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.355278e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.355278e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.183448e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.393646e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.393646e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.539604 sec
+TOTAL       :     0.534141 sec
 INFO: No Floating Point Exceptions have been reported
-     1,491,577,470      cycles                           #    2.746 GHz                    
-     3,158,610,313      instructions                     #    2.12  insn per cycle         
-       0.543883786 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2969) (512y:    0) (512z:    0)
+     1,491,163,611      cycles                           #    2.773 GHz                    
+     3,158,625,928      instructions                     #    2.12  insn per cycle         
+       0.538404277 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2976) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.497640e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.745857e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.745857e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.492000e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.744364e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.744364e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.487658 sec
+TOTAL       :     0.489086 sec
 INFO: No Floating Point Exceptions have been reported
-     1,345,211,794      cycles                           #    2.738 GHz                    
-     3,010,561,614      instructions                     #    2.24  insn per cycle         
-       0.491852014 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2719) (512y:  104) (512z:    0)
+     1,346,762,343      cycles                           #    2.733 GHz                    
+     3,011,186,186      instructions                     #    2.24  insn per cycle         
+       0.493386881 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2726) (512y:  104) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.325885e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.437267e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.437267e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.331076e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.442354e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.442354e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.726256 sec
+TOTAL       :     0.724419 sec
 INFO: No Floating Point Exceptions have been reported
-     1,325,529,479      cycles                           #    1.816 GHz                    
-     1,960,541,933      instructions                     #    1.48  insn per cycle         
-       0.730627916 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1344) (512y:  106) (512z: 2217)
+     1,327,007,586      cycles                           #    1.823 GHz                    
+     1,960,723,409      instructions                     #    1.48  insn per cycle         
+       0.728619129 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1356) (512y:  106) (512z: 2218)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt
index f0eed2f4be..8a6bb74f5e 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 56s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-08-29_22:56:42
+DATE: 2024-09-18_12:20:00
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,17 +49,17 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.119421e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.036441e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.128814e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.830452e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.999598e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.147092e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018174e+01 +- 1.429492e+01 )  GeV^-2
-TOTAL       :     0.447469 sec
+TOTAL       :     0.454704 sec
 INFO: No Floating Point Exceptions have been reported
-     1,918,025,315      cycles                           #    2.861 GHz                    
-     2,686,229,018      instructions                     #    1.40  insn per cycle         
-       0.729916634 seconds time elapsed
+     1,916,653,758      cycles                           #    2.859 GHz                    
+     2,706,744,679      instructions                     #    1.41  insn per cycle         
+       0.728210028 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 165
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 169
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
 runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP=
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.929087e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.527322e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.625267e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.474236e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.587297e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.949656e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.571360e+02 +- 2.114020e+02 )  GeV^-2
-TOTAL       :     0.484458 sec
+TOTAL       :     0.490292 sec
 INFO: No Floating Point Exceptions have been reported
-     2,025,660,077      cycles                           #    2.857 GHz                    
-     2,912,943,658      instructions                     #    1.44  insn per cycle         
-       0.767688988 seconds time elapsed
+     2,078,449,093      cycles                           #    2.886 GHz                    
+     2,974,210,572      instructions                     #    1.43  insn per cycle         
+       0.777065481 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.094328e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.119456e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.119456e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.100218e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.125308e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.125308e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018564e+01 +- 1.429903e+01 )  GeV^-2
-TOTAL       :     1.515568 sec
+TOTAL       :     1.507385 sec
 INFO: No Floating Point Exceptions have been reported
-     4,405,562,836      cycles                           #    2.900 GHz                    
-    12,951,563,992      instructions                     #    2.94  insn per cycle         
-       1.519704133 seconds time elapsed
+     4,410,101,975      cycles                           #    2.919 GHz                    
+    12,953,085,822      instructions                     #    2.94  insn per cycle         
+       1.511568329 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  645) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.864259e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.042813e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.042813e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.885848e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.067058e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.067058e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018564e+01 +- 1.429903e+01 )  GeV^-2
-TOTAL       :     0.589275 sec
+TOTAL       :     0.584913 sec
 INFO: No Floating Point Exceptions have been reported
-     1,726,786,335      cycles                           #    2.913 GHz                    
-     4,541,721,153      instructions                     #    2.63  insn per cycle         
-       0.593457832 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 3626) (avx2:    0) (512y:    0) (512z:    0)
+     1,727,797,245      cycles                           #    2.936 GHz                    
+     4,541,987,860      instructions                     #    2.63  insn per cycle         
+       0.589023498 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 3627) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.646542e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.345345e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.345345e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.703055e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.396540e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.396540e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018828e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.307893 sec
+TOTAL       :     0.305122 sec
 INFO: No Floating Point Exceptions have been reported
-       854,858,999      cycles                           #    2.745 GHz                    
-     1,917,346,030      instructions                     #    2.24  insn per cycle         
-       0.312048006 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3566) (512y:    0) (512z:    0)
+       856,571,449      cycles                           #    2.776 GHz                    
+     1,917,826,981      instructions                     #    2.24  insn per cycle         
+       0.309207440 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3580) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.020064e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.829176e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.829176e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.972249e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.763699e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.763699e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018828e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.289972 sec
+TOTAL       :     0.292353 sec
 INFO: No Floating Point Exceptions have been reported
-       804,375,281      cycles                           #    2.741 GHz                    
-     1,834,366,299      instructions                     #    2.28  insn per cycle         
-       0.294106928 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3390) (512y:   22) (512z:    0)
+       806,013,891      cycles                           #    2.724 GHz                    
+     1,834,284,908      instructions                     #    2.28  insn per cycle         
+       0.296525539 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3400) (512y:   22) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.491631e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.939083e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.939083e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.507099e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.952644e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.952644e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018829e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.384694 sec
+TOTAL       :     0.383525 sec
 INFO: No Floating Point Exceptions have been reported
-       728,307,512      cycles                           #    1.876 GHz                    
-     1,308,382,279      instructions                     #    1.80  insn per cycle         
-       0.388823134 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1942) (512y:   26) (512z: 2432)
+       728,616,899      cycles                           #    1.883 GHz                    
+     1,308,760,783      instructions                     #    1.80  insn per cycle         
+       0.387733440 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1964) (512y:   24) (512z: 2435)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt
index 5605f32a61..a6b985fae9 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 03s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-08-29_23:29:58
+DATE: 2024-09-18_12:54:48
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -57,21 +53,22 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.460428e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.981655e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.981655e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.927791e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.333519e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.333519e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.017654e+01 +- 1.429183e+01 )  GeV^-2
-TOTAL       :     0.458045 sec
+TOTAL       :     0.469351 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     1,912,527,906      cycles                           #    2.861 GHz                    
-     2,832,677,934      instructions                     #    1.48  insn per cycle         
-       0.725530696 seconds time elapsed
+     1,981,672,619      cycles                           #    2.860 GHz                    
+     2,838,745,918      instructions                     #    1.43  insn per cycle         
+       0.751805905 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384)
 WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384)
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 165
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 169
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
 runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP=
@@ -83,19 +80,23 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.031344e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.571084e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.571084e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.989037e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.963677e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.963677e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.609941e+02 +- 2.115589e+02 )  GeV^-2
-TOTAL       :     0.641626 sec
+TOTAL       :     0.635748 sec
 INFO: No Floating Point Exceptions have been reported
-     2,455,324,664      cycles                           #    2.853 GHz                    
-     3,798,610,028      instructions                     #    1.55  insn per cycle         
-       0.929289203 seconds time elapsed
+INFO: No Floating Point Exceptions have been reported
+     2,515,384,416      cycles                           #    2.885 GHz                    
+     3,805,896,684      instructions                     #    1.51  insn per cycle         
+       0.928757267 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -114,20 +115,24 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.100464e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.126356e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.126356e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.110811e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.136583e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.136583e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018564e+01 +- 1.429903e+01 )  GeV^-2
-TOTAL       :     1.510482 sec
+TOTAL       :     1.496763 sec
 INFO: No Floating Point Exceptions have been reported
-     4,428,031,735      cycles                           #    2.925 GHz                    
-    12,956,670,249      instructions                     #    2.93  insn per cycle         
-       1.514783541 seconds time elapsed
+INFO: No Floating Point Exceptions have been reported
+     4,424,063,947      cycles                           #    2.949 GHz                    
+    12,956,460,167      instructions                     #    2.93  insn per cycle         
+       1.501035221 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  645) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,24 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.852778e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.034158e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.034158e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.857646e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.036818e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.036818e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018564e+01 +- 1.429903e+01 )  GeV^-2
-TOTAL       :     0.596524 sec
+TOTAL       :     0.595633 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     1,751,480,771      cycles                           #    2.918 GHz                    
-     4,589,869,030      instructions                     #    2.62  insn per cycle         
-       0.600870196 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 3626) (avx2:    0) (512y:    0) (512z:    0)
+     1,753,185,847      cycles                           #    2.926 GHz                    
+     4,590,460,046      instructions                     #    2.62  insn per cycle         
+       0.599868062 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 3627) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -174,20 +183,24 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.591986e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.306015e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.306015e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.498095e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.167392e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.167392e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018828e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.314944 sec
+TOTAL       :     0.320525 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-       876,885,384      cycles                           #    2.752 GHz                    
-     1,954,408,231      instructions                     #    2.23  insn per cycle         
-       0.319132174 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3566) (512y:    0) (512z:    0)
+       879,877,577      cycles                           #    2.713 GHz                    
+     1,955,191,246      instructions                     #    2.22  insn per cycle         
+       0.324936571 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3580) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -204,20 +217,24 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.968412e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.771731e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.771731e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.017893e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.823832e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.823832e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018828e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.296726 sec
+TOTAL       :     0.294540 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-       822,991,830      cycles                           #    2.740 GHz                    
-     1,870,983,532      instructions                     #    2.27  insn per cycle         
-       0.300923021 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3390) (512y:   22) (512z:    0)
+       824,659,177      cycles                           #    2.764 GHz                    
+     1,871,065,231      instructions                     #    2.27  insn per cycle         
+       0.298923642 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3400) (512y:   22) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -234,20 +251,24 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.457418e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.897878e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.897878e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.488254e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.923976e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.923976e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018829e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.391935 sec
+TOTAL       :     0.389655 sec
+INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-       751,588,906      cycles                           #    1.900 GHz                    
-     1,349,727,321      instructions                     #    1.80  insn per cycle         
-       0.396323799 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1942) (512y:   26) (512z: 2432)
+       750,952,234      cycles                           #    1.909 GHz                    
+     1,350,104,124      instructions                     #    1.80  insn per cycle         
+       0.394048329 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1964) (512y:   24) (512z: 2435)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt
index d040177026..67763acaac 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 48s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-08-29_22:56:54
+DATE: 2024-09-18_12:20:13
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,17 +49,17 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.127637e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.053180e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.153862e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.836197e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.010594e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.149037e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018174e+01 +- 1.429492e+01 )  GeV^-2
-TOTAL       :     0.446303 sec
+TOTAL       :     0.455895 sec
 INFO: No Floating Point Exceptions have been reported
-     1,892,017,092      cycles                           #    2.852 GHz                    
-     2,646,562,805      instructions                     #    1.40  insn per cycle         
-       0.720494676 seconds time elapsed
+     1,937,517,385      cycles                           #    2.882 GHz                    
+     2,695,733,072      instructions                     #    1.39  insn per cycle         
+       0.731352438 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 164
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 169
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
 runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP=
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.004070e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.568435e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.672294e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.416288e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.368760e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.717095e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.571360e+02 +- 2.114020e+02 )  GeV^-2
-TOTAL       :     0.485987 sec
+TOTAL       :     0.494521 sec
 INFO: No Floating Point Exceptions have been reported
-     2,027,231,831      cycles                           #    2.852 GHz                    
-     2,888,437,663      instructions                     #    1.42  insn per cycle         
-       0.769793031 seconds time elapsed
+     2,101,577,521      cycles                           #    2.872 GHz                    
+     2,967,805,317      instructions                     #    1.41  insn per cycle         
+       0.790688389 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.096156e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.120921e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.120921e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.109320e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.134422e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.134422e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018564e+01 +- 1.429903e+01 )  GeV^-2
-TOTAL       :     1.512692 sec
+TOTAL       :     1.495018 sec
 INFO: No Floating Point Exceptions have been reported
-     4,411,886,190      cycles                           #    2.910 GHz                    
-    12,928,184,190      instructions                     #    2.93  insn per cycle         
-       1.516753339 seconds time elapsed
+     4,406,318,830      cycles                           #    2.941 GHz                    
+    12,927,562,871      instructions                     #    2.93  insn per cycle         
+       1.499121241 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  630) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.878836e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.060923e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.060923e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.896108e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.081157e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.081157e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018564e+01 +- 1.429903e+01 )  GeV^-2
-TOTAL       :     0.585977 sec
+TOTAL       :     0.582933 sec
 INFO: No Floating Point Exceptions have been reported
-     1,726,387,164      cycles                           #    2.928 GHz                    
-     4,536,203,734      instructions                     #    2.63  insn per cycle         
-       0.590186867 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 3610) (avx2:    0) (512y:    0) (512z:    0)
+     1,729,684,566      cycles                           #    2.949 GHz                    
+     4,536,959,704      instructions                     #    2.62  insn per cycle         
+       0.587227353 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 3611) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.660034e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.358223e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.358223e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.671417e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.388788e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.388788e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018828e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.307186 sec
+TOTAL       :     0.306400 sec
 INFO: No Floating Point Exceptions have been reported
-       856,403,810      cycles                           #    2.756 GHz                    
-     1,914,283,667      instructions                     #    2.24  insn per cycle         
-       0.311393558 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3536) (512y:    0) (512z:    0)
+       861,419,707      cycles                           #    2.779 GHz                    
+     1,914,521,871      instructions                     #    2.22  insn per cycle         
+       0.310539350 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3549) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.036262e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.837462e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.837462e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.063623e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.871376e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.871376e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018828e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.289093 sec
+TOTAL       :     0.288149 sec
 INFO: No Floating Point Exceptions have been reported
-       803,624,213      cycles                           #    2.746 GHz                    
-     1,830,203,476      instructions                     #    2.28  insn per cycle         
-       0.293218472 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3354) (512y:   22) (512z:    0)
+       805,096,427      cycles                           #    2.760 GHz                    
+     1,830,123,182      instructions                     #    2.27  insn per cycle         
+       0.292238886 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3364) (512y:   22) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.513089e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.950488e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.950488e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.516684e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.964575e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.964575e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018829e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.383052 sec
+TOTAL       :     0.382571 sec
 INFO: No Floating Point Exceptions have been reported
-       728,226,687      cycles                           #    1.884 GHz                    
-     1,306,112,916      instructions                     #    1.79  insn per cycle         
-       0.387247657 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1905) (512y:   26) (512z: 2435)
+       732,988,918      cycles                           #    1.898 GHz                    
+     1,306,469,020      instructions                     #    1.78  insn per cycle         
+       0.386957442 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1928) (512y:   24) (512z: 2435)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt
index 7e1b4e6534..91e0f5565c 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 56s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-08-29_22:57:06
+DATE: 2024-09-18_12:20:25
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.749807e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.308121e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.661112e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.873875e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.862522e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.986643e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.450446 sec
+TOTAL       :     0.460578 sec
 INFO: No Floating Point Exceptions have been reported
-     1,888,302,564      cycles                           #    2.834 GHz                    
-     2,667,402,552      instructions                     #    1.41  insn per cycle         
-       0.723703520 seconds time elapsed
+     1,947,256,638      cycles                           #    2.867 GHz                    
+     2,707,543,109      instructions                     #    1.39  insn per cycle         
+       0.736559227 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.346796e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.137623e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.549952e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.023480e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.410580e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.620297e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.602505e+02 +- 2.116328e+02 )  GeV^-2
-TOTAL       :     0.534520 sec
+TOTAL       :     0.541253 sec
 INFO: No Floating Point Exceptions have been reported
-     2,203,657,514      cycles                           #    2.855 GHz                    
-     3,143,925,936      instructions                     #    1.43  insn per cycle         
-       0.829949035 seconds time elapsed
+     2,242,175,675      cycles                           #    2.877 GHz                    
+     3,202,804,008      instructions                     #    1.43  insn per cycle         
+       0.837177537 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.051238e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.073958e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.073958e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.056357e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.078870e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.078870e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     1.578696 sec
+TOTAL       :     1.570709 sec
 INFO: No Floating Point Exceptions have been reported
-     4,641,612,180      cycles                           #    2.934 GHz                    
-    13,177,949,346      instructions                     #    2.84  insn per cycle         
-       1.582867704 seconds time elapsed
+     4,639,217,468      cycles                           #    2.947 GHz                    
+    13,177,906,216      instructions                     #    2.84  insn per cycle         
+       1.574828509 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  681) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.853038e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.923408e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.923408e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.872603e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.943230e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.943230e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.903441 sec
+TOTAL       :     0.894053 sec
 INFO: No Floating Point Exceptions have been reported
-     2,654,124,047      cycles                           #    2.927 GHz                    
-     7,474,562,151      instructions                     #    2.82  insn per cycle         
-       0.907731665 seconds time elapsed
+     2,648,821,910      cycles                           #    2.951 GHz                    
+     7,473,297,472      instructions                     #    2.82  insn per cycle         
+       0.898331919 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3152) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.181787e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.395557e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.395557e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.194377e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.403567e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.403567e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.534479 sec
+TOTAL       :     0.532589 sec
 INFO: No Floating Point Exceptions have been reported
-     1,477,170,124      cycles                           #    2.745 GHz                    
-     3,127,314,564      instructions                     #    2.12  insn per cycle         
-       0.538817015 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3119) (512y:    0) (512z:    0)
+     1,476,927,402      cycles                           #    2.754 GHz                    
+     3,127,083,010      instructions                     #    2.12  insn per cycle         
+       0.536841632 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3133) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.518635e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.785177e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.785177e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.590247e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.853965e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.853965e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.485514 sec
+TOTAL       :     0.476058 sec
 INFO: No Floating Point Exceptions have been reported
-     1,324,922,733      cycles                           #    2.709 GHz                    
-     2,981,261,784      instructions                     #    2.25  insn per cycle         
-       0.489729459 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2881) (512y:  110) (512z:    0)
+     1,323,043,261      cycles                           #    2.758 GHz                    
+     2,981,146,980      instructions                     #    2.25  insn per cycle         
+       0.480339840 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2895) (512y:  110) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.248846e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.351092e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.351092e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.287752e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.394431e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.394431e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.750272 sec
+TOTAL       :     0.737861 sec
 INFO: No Floating Point Exceptions have been reported
-     1,367,223,495      cycles                           #    1.814 GHz                    
-     1,990,069,192      instructions                     #    1.46  insn per cycle         
-       0.754512761 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1656) (512y:  108) (512z: 2251)
+     1,365,080,339      cycles                           #    1.841 GHz                    
+     1,989,993,648      instructions                     #    1.46  insn per cycle         
+       0.742169497 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1679) (512y:  108) (512z: 2251)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt
index a62de088c9..bc8dd367d2 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 50s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-08-29_22:57:19
+DATE: 2024-09-18_12:20:39
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.682090e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.100032e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.454090e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.866883e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.866305e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.974692e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.453456 sec
+TOTAL       :     0.459058 sec
 INFO: No Floating Point Exceptions have been reported
-     1,900,785,946      cycles                           #    2.849 GHz                    
-     2,696,170,354      instructions                     #    1.42  insn per cycle         
-       0.725943157 seconds time elapsed
+     1,947,107,746      cycles                           #    2.878 GHz                    
+     2,728,462,242      instructions                     #    1.40  insn per cycle         
+       0.734017841 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.329235e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.987947e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.373459e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.016948e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.370809e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.575747e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.602505e+02 +- 2.116328e+02 )  GeV^-2
-TOTAL       :     0.531321 sec
+TOTAL       :     0.539080 sec
 INFO: No Floating Point Exceptions have been reported
-     2,203,065,418      cycles                           #    2.864 GHz                    
-     3,159,908,120      instructions                     #    1.43  insn per cycle         
-       0.826130785 seconds time elapsed
+     2,244,664,779      cycles                           #    2.884 GHz                    
+     3,243,168,469      instructions                     #    1.44  insn per cycle         
+       0.835323761 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.034563e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.056930e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.056930e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.054078e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.076959e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.076959e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     1.603724 sec
+TOTAL       :     1.574290 sec
 INFO: No Floating Point Exceptions have been reported
-     4,646,351,391      cycles                           #    2.891 GHz                    
-    13,165,982,220      instructions                     #    2.83  insn per cycle         
-       1.607855644 seconds time elapsed
+     4,646,036,617      cycles                           #    2.945 GHz                    
+    13,166,645,489      instructions                     #    2.83  insn per cycle         
+       1.578550564 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  666) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.860303e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.930543e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.930543e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.873438e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.944671e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.944671e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.899725 sec
+TOTAL       :     0.893595 sec
 INFO: No Floating Point Exceptions have been reported
-     2,640,401,361      cycles                           #    2.923 GHz                    
-     7,476,357,327      instructions                     #    2.83  insn per cycle         
-       0.904022044 seconds time elapsed
+     2,639,674,089      cycles                           #    2.942 GHz                    
+     7,474,954,292      instructions                     #    2.83  insn per cycle         
+       0.897961439 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3141) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.187408e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.395650e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.395650e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.194979e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.406933e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.406933e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.533442 sec
+TOTAL       :     0.532401 sec
 INFO: No Floating Point Exceptions have been reported
-     1,473,667,938      cycles                           #    2.744 GHz                    
-     3,127,923,561      instructions                     #    2.12  insn per cycle         
-       0.537621751 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3097) (512y:    0) (512z:    0)
+     1,471,043,256      cycles                           #    2.744 GHz                    
+     3,127,494,333      instructions                     #    2.13  insn per cycle         
+       0.536715670 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3111) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.577590e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.837843e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.837843e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.604804e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.871054e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.871054e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.477166 sec
+TOTAL       :     0.474052 sec
 INFO: No Floating Point Exceptions have been reported
-     1,321,328,150      cycles                           #    2.748 GHz                    
-     2,982,033,810      instructions                     #    2.26  insn per cycle         
-       0.481493993 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2857) (512y:  110) (512z:    0)
+     1,321,700,799      cycles                           #    2.767 GHz                    
+     2,981,907,836      instructions                     #    2.26  insn per cycle         
+       0.478334854 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2871) (512y:  110) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.247156e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.351150e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.351150e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.246259e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.348752e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.348752e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.750754 sec
+TOTAL       :     0.751218 sec
 INFO: No Floating Point Exceptions have been reported
-     1,365,868,869      cycles                           #    1.811 GHz                    
-     1,990,498,794      instructions                     #    1.46  insn per cycle         
-       0.754982203 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1632) (512y:  108) (512z: 2251)
+     1,373,432,632      cycles                           #    1.819 GHz                    
+     1,989,927,175      instructions                     #    1.45  insn per cycle         
+       0.755614240 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1655) (512y:  108) (512z: 2251)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt
index 5974fe5c05..6ae2d07b8c 100644
--- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 01m 07s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
-DATE: 2024-08-30_00:10:03
+DATE: 2024-09-18_13:35:28
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.565771e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.087963e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.183774e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.333836e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.844165e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.406248e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     0.527958 sec
+TOTAL       :     0.534496 sec
 INFO: No Floating Point Exceptions have been reported
-     2,183,035,709      cycles                           #    2.857 GHz                    
-     3,157,151,269      instructions                     #    1.45  insn per cycle         
-       0.821528267 seconds time elapsed
+     2,180,885,043      cycles                           #    2.827 GHz                    
+     3,135,152,783      instructions                     #    1.44  insn per cycle         
+       0.828766444 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 226
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 228
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.616617e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.653100e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.653100e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.605100e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.641462e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.641462e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     6.596281 sec
+TOTAL       :     6.643809 sec
 INFO: No Floating Point Exceptions have been reported
-    19,282,941,048      cycles                           #    2.921 GHz                    
-    51,926,972,074      instructions                     #    2.69  insn per cycle         
-       6.601945823 seconds time elapsed
+    19,303,523,142      cycles                           #    2.904 GHz                    
+    51,922,542,271      instructions                     #    2.69  insn per cycle         
+       6.649309354 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  668) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.899605e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.029471e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.029471e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.864838e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.993187e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.993187e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     3.728280 sec
+TOTAL       :     3.771971 sec
 INFO: No Floating Point Exceptions have been reported
-    10,887,007,673      cycles                           #    2.916 GHz                    
-    30,780,884,538      instructions                     #    2.83  insn per cycle         
-       3.733824013 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 2914) (avx2:    0) (512y:    0) (512z:    0)
+    10,899,823,947      cycles                           #    2.886 GHz                    
+    30,797,169,430      instructions                     #    2.83  insn per cycle         
+       3.777469678 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 2915) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.691675e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.024979e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.024979e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.618832e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.953390e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.953390e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.344753 sec
+TOTAL       :     2.382134 sec
 INFO: No Floating Point Exceptions have been reported
-     6,446,140,954      cycles                           #    2.744 GHz                    
-    13,661,836,237      instructions                     #    2.12  insn per cycle         
-       2.350400868 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2934) (512y:    0) (512z:    0)
+     6,463,553,394      cycles                           #    2.708 GHz                    
+    13,666,010,364      instructions                     #    2.11  insn per cycle         
+       2.387555326 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2941) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.132027e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.532809e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.532809e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.007992e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.398956e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.398956e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.152562 sec
+TOTAL       :     2.205089 sec
 INFO: No Floating Point Exceptions have been reported
-     5,941,792,199      cycles                           #    2.754 GHz                    
-    13,004,857,023      instructions                     #    2.19  insn per cycle         
-       2.158116058 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2660) (512y:  146) (512z:    0)
+     5,947,846,964      cycles                           #    2.692 GHz                    
+    13,006,222,979      instructions                     #    2.19  insn per cycle         
+       2.210472243 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2667) (512y:  146) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.404422e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.577036e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.577036e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.325208e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.493799e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.493799e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     3.190739 sec
+TOTAL       :     3.265150 sec
 INFO: No Floating Point Exceptions have been reported
-     5,831,326,790      cycles                           #    1.825 GHz                    
-     8,584,787,842      instructions                     #    1.47  insn per cycle         
-       3.196417922 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1494) (512y:  128) (512z: 1942)
+     5,846,999,066      cycles                           #    1.789 GHz                    
+     8,588,678,582      instructions                     #    1.47  insn per cycle         
+       3.271052301 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1506) (512y:  128) (512z: 1946)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt
index adfc4bbe17..a09eaeb7bd 100644
--- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 49s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
-DATE: 2024-08-30_00:10:30
+DATE: 2024-09-18_13:35:55
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.567664e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.091591e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.187477e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.270085e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.841839e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.403601e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     0.530828 sec
+TOTAL       :     0.533386 sec
 INFO: No Floating Point Exceptions have been reported
-     2,204,760,373      cycles                           #    2.868 GHz                    
-     3,152,488,657      instructions                     #    1.43  insn per cycle         
-       0.828555127 seconds time elapsed
+     2,214,034,172      cycles                           #    2.879 GHz                    
+     3,142,399,923      instructions                     #    1.42  insn per cycle         
+       0.826419344 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 216
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.697449e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.737720e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.737720e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.706120e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.746757e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.746757e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     6.286999 sec
+TOTAL       :     6.255979 sec
 INFO: No Floating Point Exceptions have been reported
-    18,353,069,323      cycles                           #    2.917 GHz                    
-    50,057,473,820      instructions                     #    2.73  insn per cycle         
-       6.292828149 seconds time elapsed
+    18,389,967,178      cycles                           #    2.937 GHz                    
+    50,052,771,539      instructions                     #    2.72  insn per cycle         
+       6.261520945 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  626) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.072939e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.218519e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.218519e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.086242e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.232589e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.232589e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     3.523041 sec
+TOTAL       :     3.507867 sec
 INFO: No Floating Point Exceptions have been reported
-    10,324,590,511      cycles                           #    2.927 GHz                    
-    29,157,948,833      instructions                     #    2.82  insn per cycle         
-       3.528873583 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 2732) (avx2:    0) (512y:    0) (512z:    0)
+    10,373,977,217      cycles                           #    2.954 GHz                    
+    29,174,589,795      instructions                     #    2.81  insn per cycle         
+       3.513510894 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 2733) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.338289e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.619702e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.619702e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.355224e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.644479e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.644479e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.526518 sec
+TOTAL       :     2.518598 sec
 INFO: No Floating Point Exceptions have been reported
-     6,923,035,282      cycles                           #    2.735 GHz                    
-    15,145,434,537      instructions                     #    2.19  insn per cycle         
-       2.532145959 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3014) (512y:    0) (512z:    0)
+     6,982,239,473      cycles                           #    2.767 GHz                    
+    15,149,066,703      instructions                     #    2.17  insn per cycle         
+       2.524208385 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3020) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.516990e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.824523e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.824523e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.542431e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.862341e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.862341e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.431225 sec
+TOTAL       :     2.419194 sec
 INFO: No Floating Point Exceptions have been reported
-     6,666,697,943      cycles                           #    2.737 GHz                    
-    14,616,405,824      instructions                     #    2.19  insn per cycle         
-       2.436846304 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2610) (512y:  302) (512z:    0)
+     6,707,959,962      cycles                           #    2.767 GHz                    
+    14,619,001,595      instructions                     #    2.18  insn per cycle         
+       2.424680502 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2621) (512y:  302) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.295253e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.454598e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.454598e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.289276e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.449465e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.449465e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     3.291723 sec
+TOTAL       :     3.297038 sec
 INFO: No Floating Point Exceptions have been reported
-     6,035,530,423      cycles                           #    1.831 GHz                    
-    10,335,265,923      instructions                     #    1.71  insn per cycle         
-       3.297372600 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1256) (512y:  214) (512z: 2129)
+     6,083,017,370      cycles                           #    1.843 GHz                    
+    10,339,705,857      instructions                     #    1.70  insn per cycle         
+       3.302657897 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1268) (512y:  214) (512z: 2129)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt
index 3a770e0987..50a3de8673 100644
--- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 55s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
-DATE: 2024-08-30_00:10:56
+DATE: 2024-09-18_13:36:21
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.398478e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.000098e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.167990e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.744477e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.525834e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.617286e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 7.154219e+00 +- 1.620281e-01 )  GeV^0
-TOTAL       :     0.486397 sec
+TOTAL       :     0.492808 sec
 INFO: No Floating Point Exceptions have been reported
-     2,024,122,033      cycles                           #    2.853 GHz                    
-     2,916,570,176      instructions                     #    1.44  insn per cycle         
-       0.767663174 seconds time elapsed
+     2,047,170,412      cycles                           #    2.828 GHz                    
+     2,929,586,090      instructions                     #    1.43  insn per cycle         
+       0.781904908 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 157
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 131
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.678377e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.720094e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.720094e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.678996e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.720654e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.720654e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.175644e+00 +- 1.658767e-01 )  GeV^0
-TOTAL       :     6.334718 sec
+TOTAL       :     6.333448 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-    18,555,321,069      cycles                           #    2.927 GHz                    
-    51,213,843,891      instructions                     #    2.76  insn per cycle         
-       6.340063735 seconds time elapsed
+    18,607,993,167      cycles                           #    2.936 GHz                    
+    51,216,519,035      instructions                     #    2.75  insn per cycle         
+       6.339213853 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  625) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -117,20 +119,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.997779e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.258013e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.258013e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.022786e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.287209e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.287209e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.175642e+00 +- 1.658767e-01 )  GeV^0
-TOTAL       :     2.708980 sec
+TOTAL       :     2.694054 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     7,936,378,928      cycles                           #    2.926 GHz                    
-    19,315,957,189      instructions                     #    2.43  insn per cycle         
-       2.714320099 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 3543) (avx2:    0) (512y:    0) (512z:    0)
+     7,934,623,764      cycles                           #    2.940 GHz                    
+    19,316,417,604      instructions                     #    2.43  insn per cycle         
+       2.699461082 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 3542) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -148,20 +153,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.845222e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.836028e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.836028e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.880495e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.877642e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.877642e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.198861e+00 +- 1.710281e-01 )  GeV^0
-TOTAL       :     1.422585 sec
+TOTAL       :     1.418247 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     3,940,333,189      cycles                           #    2.761 GHz                    
-     8,829,110,284      instructions                     #    2.24  insn per cycle         
-       1.427918817 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3701) (512y:    0) (512z:    0)
+     3,951,478,174      cycles                           #    2.777 GHz                    
+     8,833,281,557      instructions                     #    2.24  insn per cycle         
+       1.423672827 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3715) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -177,20 +185,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.323802e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.447444e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.447444e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.368251e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.499225e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.499225e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.198861e+00 +- 1.710281e-01 )  GeV^0
-TOTAL       :     1.345781 sec
+TOTAL       :     1.339980 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     3,723,275,845      cycles                           #    2.757 GHz                    
-     8,433,230,552      instructions                     #    2.27  insn per cycle         
-       1.351128682 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3531) (512y:   20) (512z:    0)
+     3,727,978,138      cycles                           #    2.773 GHz                    
+     8,431,050,226      instructions                     #    2.26  insn per cycle         
+       1.345489073 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3541) (512y:   20) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -206,20 +217,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.920739e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.460739e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.460739e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.964882e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.513882e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.513882e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.198861e+00 +- 1.710281e-01 )  GeV^0
-TOTAL       :     1.857321 sec
+TOTAL       :     1.846291 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     3,505,970,146      cycles                           #    1.883 GHz                    
-     6,241,421,267      instructions                     #    1.78  insn per cycle         
-       1.862744743 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2373) (512y:   24) (512z: 2288)
+     3,506,879,162      cycles                           #    1.895 GHz                    
+     6,243,949,016      instructions                     #    1.78  insn per cycle         
+       1.851728712 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2325) (512y:   22) (512z: 2290)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt
index 6fd24bcfe5..2b5536237c 100644
--- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 50s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
-DATE: 2024-08-30_00:11:18
+DATE: 2024-09-18_13:36:43
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.506429e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.027813e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.199337e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.958341e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.585012e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.687226e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 7.154219e+00 +- 1.620281e-01 )  GeV^0
-TOTAL       :     0.487321 sec
+TOTAL       :     0.488902 sec
 INFO: No Floating Point Exceptions have been reported
-     2,021,518,183      cycles                           #    2.846 GHz                    
-     2,916,248,031      instructions                     #    1.44  insn per cycle         
-       0.768383991 seconds time elapsed
+     2,054,269,206      cycles                           #    2.862 GHz                    
+     2,934,748,812      instructions                     #    1.43  insn per cycle         
+       0.774105073 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 131
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 125
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.723287e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.768803e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.768803e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.738704e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.782579e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.782579e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.175644e+00 +- 1.658767e-01 )  GeV^0
-TOTAL       :     6.171783 sec
+TOTAL       :     6.118201 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-    18,041,103,143      cycles                           #    2.921 GHz                    
-    49,602,992,577      instructions                     #    2.75  insn per cycle         
-       6.177081980 seconds time elapsed
+    18,018,613,315      cycles                           #    2.943 GHz                    
+    49,602,263,054      instructions                     #    2.75  insn per cycle         
+       6.123752242 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  613) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -117,20 +119,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.514454e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.848137e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.848137e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.513439e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.846420e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.846420e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.175642e+00 +- 1.658767e-01 )  GeV^0
-TOTAL       :     2.409156 sec
+TOTAL       :     2.410664 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     7,065,969,827      cycles                           #    2.927 GHz                    
-    18,480,893,076      instructions                     #    2.62  insn per cycle         
-       2.414608873 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 3235) (avx2:    0) (512y:    0) (512z:    0)
+     7,118,641,278      cycles                           #    2.947 GHz                    
+    18,533,207,759      instructions                     #    2.60  insn per cycle         
+       2.416130283 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 3252) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -148,20 +153,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.355617e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.802219e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.802219e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.337179e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.778552e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.778552e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.198861e+00 +- 1.710281e-01 )  GeV^0
-TOTAL       :     2.043734 sec
+TOTAL       :     2.052609 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     5,636,114,095      cycles                           #    2.752 GHz                    
-    10,845,591,498      instructions                     #    1.92  insn per cycle         
-       2.049012424 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4260) (512y:    0) (512z:    0)
+     5,666,208,381      cycles                           #    2.754 GHz                    
+    10,850,402,094      instructions                     #    1.91  insn per cycle         
+       2.057862471 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4274) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -179,20 +187,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.424944e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.877966e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.877966e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.416639e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.866517e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.866517e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.198861e+00 +- 1.710281e-01 )  GeV^0
-TOTAL       :     2.018741 sec
+TOTAL       :     2.022314 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     5,551,896,925      cycles                           #    2.744 GHz                    
-    10,543,438,618      instructions                     #    1.90  insn per cycle         
-       2.024091086 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4123) (512y:   12) (512z:    0)
+     5,555,880,143      cycles                           #    2.741 GHz                    
+    10,551,186,314      instructions                     #    1.90  insn per cycle         
+       2.027927255 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4138) (512y:   12) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -210,20 +221,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.344942e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.633588e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.633588e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.322863e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.603781e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.603781e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.198861e+00 +- 1.710281e-01 )  GeV^0
-TOTAL       :     2.500651 sec
+TOTAL       :     2.514102 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     4,636,697,064      cycles                           #    1.851 GHz                    
-     8,657,159,753      instructions                     #    1.87  insn per cycle         
-       2.506121530 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2849) (512y:    0) (512z: 2883)
+     4,668,008,181      cycles                           #    1.854 GHz                    
+     8,659,615,849      instructions                     #    1.86  insn per cycle         
+       2.519706497 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2799) (512y:    0) (512z: 2885)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt
index 2259d7cf4f..3c9a7750d0 100644
--- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 55s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
-DATE: 2024-08-30_00:11:41
+DATE: 2024-09-18_13:37:07
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.538666e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.087660e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.184391e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.259037e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.833623e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.391198e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     0.527953 sec
+TOTAL       :     0.533343 sec
 INFO: No Floating Point Exceptions have been reported
-     2,183,107,430      cycles                           #    2.872 GHz                    
-     3,152,023,505      instructions                     #    1.44  insn per cycle         
-       0.817097816 seconds time elapsed
+     2,205,791,107      cycles                           #    2.867 GHz                    
+     3,166,074,888      instructions                     #    1.44  insn per cycle         
+       0.826367468 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 226
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 228
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.521018e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.553268e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.553268e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.526469e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.558963e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.558963e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     7.003787 sec
+TOTAL       :     6.979629 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-    20,471,020,649      cycles                           #    2.921 GHz                    
-    51,928,444,025      instructions                     #    2.54  insn per cycle         
-       7.009468548 seconds time elapsed
+    20,509,216,850      cycles                           #    2.937 GHz                    
+    51,923,869,243      instructions                     #    2.53  insn per cycle         
+       6.985125737 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  655) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -117,20 +119,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.665203e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.774786e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.774786e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.719239e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.833565e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.833565e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     4.044714 sec
+TOTAL       :     3.966787 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-    11,506,047,617      cycles                           #    2.842 GHz                    
-    30,594,015,931      instructions                     #    2.66  insn per cycle         
-       4.050341042 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 2970) (avx2:    0) (512y:    0) (512z:    0)
+    11,507,632,981      cycles                           #    2.897 GHz                    
+    30,592,941,946      instructions                     #    2.66  insn per cycle         
+       3.972658763 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 2972) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -148,20 +153,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.502333e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.811948e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.811948e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.525746e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.838241e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.838241e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.438893 sec
+TOTAL       :     2.427006 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     6,698,062,649      cycles                           #    2.741 GHz                    
-    13,603,721,480      instructions                     #    2.03  insn per cycle         
-       2.444469603 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3106) (512y:    0) (512z:    0)
+     6,694,021,096      cycles                           #    2.753 GHz                    
+    13,606,483,540      instructions                     #    2.03  insn per cycle         
+       2.432521216 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3118) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -179,20 +187,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.940795e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.309966e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.309966e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.956630e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.333349e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.333349e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.232647 sec
+TOTAL       :     2.225975 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     6,140,627,215      cycles                           #    2.744 GHz                    
-    12,970,097,401      instructions                     #    2.11  insn per cycle         
-       2.238426687 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2839) (512y:  150) (512z:    0)
+     6,165,401,380      cycles                           #    2.764 GHz                    
+    12,974,481,027      instructions                     #    2.10  insn per cycle         
+       2.231658259 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2851) (512y:  150) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -210,20 +221,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.083008e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.221921e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.221921e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.095455e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.237519e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.237519e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     3.512125 sec
+TOTAL       :     3.497825 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     6,392,916,293      cycles                           #    1.818 GHz                    
-     8,698,960,285      instructions                     #    1.36  insn per cycle         
-       3.517835253 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1769) (512y:  130) (512z: 2012)
+     6,439,450,147      cycles                           #    1.839 GHz                    
+     8,701,510,932      instructions                     #    1.35  insn per cycle         
+       3.503267717 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1792) (512y:  130) (512z: 2014)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt
index de193defe8..008d0a9d35 100644
--- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 51s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
-DATE: 2024-08-30_00:12:09
+DATE: 2024-09-18_13:37:34
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.575688e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.092997e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.188730e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.252482e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.819370e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.388849e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     0.531136 sec
+TOTAL       :     0.535514 sec
 INFO: No Floating Point Exceptions have been reported
-     2,208,236,175      cycles                           #    2.863 GHz                    
-     3,176,316,980      instructions                     #    1.44  insn per cycle         
-       0.829765196 seconds time elapsed
+     2,214,143,603      cycles                           #    2.876 GHz                    
+     3,159,539,235      instructions                     #    1.43  insn per cycle         
+       0.828878265 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 216
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.597775e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.633618e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.633618e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.608272e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.644385e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.644385e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     6.673665 sec
+TOTAL       :     6.629223 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-    19,507,572,945      cycles                           #    2.921 GHz                    
-    49,961,631,538      instructions                     #    2.56  insn per cycle         
-       6.679180336 seconds time elapsed
+    19,498,919,287      cycles                           #    2.939 GHz                    
+    49,953,158,127      instructions                     #    2.56  insn per cycle         
+       6.634747708 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  599) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -117,20 +119,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.893528e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.021968e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.021968e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.887478e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.016037e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.016037e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     3.733810 sec
+TOTAL       :     3.741933 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-    10,902,882,333      cycles                           #    2.917 GHz                    
-    29,099,087,878      instructions                     #    2.67  insn per cycle         
-       3.739349782 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 2806) (avx2:    0) (512y:    0) (512z:    0)
+    11,045,759,193      cycles                           #    2.948 GHz                    
+    29,138,468,069      instructions                     #    2.64  insn per cycle         
+       3.747566884 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 2815) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -148,20 +153,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.711898e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.915395e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.915395e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.735821e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.946793e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.946793e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.935036 sec
+TOTAL       :     2.917806 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     8,019,016,096      cycles                           #    2.728 GHz                    
-    15,168,124,592      instructions                     #    1.89  insn per cycle         
-       2.940665569 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3190) (512y:    0) (512z:    0)
+     8,064,126,572      cycles                           #    2.759 GHz                    
+    15,188,166,070      instructions                     #    1.88  insn per cycle         
+       2.923408860 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3203) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -179,20 +187,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.915818e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.144411e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.144411e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.934941e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.167424e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.167424e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.787579 sec
+TOTAL       :     2.773801 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     7,662,521,181      cycles                           #    2.744 GHz                    
-    14,477,655,900      instructions                     #    1.89  insn per cycle         
-       2.793241788 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2762) (512y:  304) (512z:    0)
+     7,685,843,393      cycles                           #    2.766 GHz                    
+    14,482,526,269      instructions                     #    1.88  insn per cycle         
+       2.779397074 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2775) (512y:  304) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -210,20 +221,23 @@ Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.007999e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.141501e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.141501e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.028557e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.163339e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.163339e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     3.595624 sec
+TOTAL       :     3.571755 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     6,539,430,615      cycles                           #    1.817 GHz                    
-     9,892,281,397      instructions                     #    1.51  insn per cycle         
-       3.601324479 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1542) (512y:  216) (512z: 2216)
+     6,530,752,454      cycles                           #    1.826 GHz                    
+     9,894,967,129      instructions                     #    1.52  insn per cycle         
+       3.577461945 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1565) (512y:  216) (512z: 2216)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
+INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
+DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt
index fa7b02a6a2..052ae7ee83 100644
--- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 01m 15s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
-DATE: 2024-08-30_00:09:01
+DATE: 2024-09-18_13:34:22
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.194339e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.220510e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.224134e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.764082e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.781890e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.785142e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.466393 sec
+TOTAL       :     0.472813 sec
 INFO: No Floating Point Exceptions have been reported
-     1,946,789,529      cycles                           #    2.848 GHz                    
-     2,840,228,076      instructions                     #    1.46  insn per cycle         
-       0.743222441 seconds time elapsed
+     1,988,958,737      cycles                           #    2.864 GHz                    
+     2,937,434,860      instructions                     #    1.48  insn per cycle         
+       0.752740146 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.849118e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.996884e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.006314e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.003017e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.119483e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.127951e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 8.048215e-03 +- 4.042405e-03 )  GeV^-4
-TOTAL       :     0.482333 sec
+TOTAL       :     0.489631 sec
 INFO: No Floating Point Exceptions have been reported
-     2,040,122,455      cycles                           #    2.862 GHz                    
-     3,026,821,422      instructions                     #    1.48  insn per cycle         
-       0.771290598 seconds time elapsed
+     2,045,084,255      cycles                           #    2.869 GHz                    
+     3,023,069,261      instructions                     #    1.48  insn per cycle         
+       0.771335484 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.401682e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.405003e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.405003e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.395968e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.399199e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.399199e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.157394 sec
+TOTAL       :     0.157638 sec
 INFO: No Floating Point Exceptions have been reported
-       467,966,696      cycles                           #    2.914 GHz                    
-     1,389,665,974      instructions                     #    2.97  insn per cycle         
-       0.161243043 seconds time elapsed
+       469,190,775      cycles                           #    2.915 GHz                    
+     1,389,792,831      instructions                     #    2.96  insn per cycle         
+       0.161480291 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3908) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.393464e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.405879e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.405879e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.497864e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.511372e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.511372e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.084547 sec
+TOTAL       :     0.083349 sec
 INFO: No Floating Point Exceptions have been reported
-       240,797,416      cycles                           #    2.743 GHz                    
-       692,952,810      instructions                     #    2.88  insn per cycle         
-       0.088416716 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 9483) (avx2:    0) (512y:    0) (512z:    0)
+       241,222,273      cycles                           #    2.780 GHz                    
+       693,002,253      instructions                     #    2.87  insn per cycle         
+       0.087370180 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 9482) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.416012e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.421968e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.421968e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.431164e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.437397e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.437397e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.039303 sec
+TOTAL       :     0.038955 sec
 INFO: No Floating Point Exceptions have been reported
-       114,539,539      cycles                           #    2.685 GHz                    
-       257,865,602      instructions                     #    2.25  insn per cycle         
-       0.043139255 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8496) (512y:    0) (512z:    0)
+       115,308,474      cycles                           #    2.709 GHz                    
+       257,920,071      instructions                     #    2.24  insn per cycle         
+       0.043236547 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8501) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.621088e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.629157e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.629157e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.580017e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.587312e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.587312e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.034613 sec
+TOTAL       :     0.035473 sec
 INFO: No Floating Point Exceptions have been reported
-       102,700,525      cycles                           #    2.709 GHz                    
-       239,920,525      instructions                     #    2.34  insn per cycle         
-       0.038479326 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8133) (512y:  150) (512z:    0)
+       102,969,893      cycles                           #    2.655 GHz                    
+       240,051,517      instructions                     #    2.33  insn per cycle         
+       0.039391596 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8143) (512y:  150) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.200711e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.205895e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.205895e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.194413e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.199659e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.199659e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.046254 sec
+TOTAL       :     0.046450 sec
 INFO: No Floating Point Exceptions have been reported
-        90,180,411      cycles                           #    1.814 GHz                    
-       134,242,334      instructions                     #    1.49  insn per cycle         
-       0.050311483 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1931) (512y:  126) (512z: 7089)
+        90,344,224      cycles                           #    1.811 GHz                    
+       134,320,028      instructions                     #    1.49  insn per cycle         
+       0.050486009 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1943) (512y:  126) (512z: 7086)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt
index a8b8f29f83..a192f75604 100644
--- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 56s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
-DATE: 2024-08-30_00:09:11
+DATE: 2024-09-18_13:34:33
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.239289e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.264501e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.268217e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.801517e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.819462e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.822714e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.464414 sec
+TOTAL       :     0.476112 sec
 INFO: No Floating Point Exceptions have been reported
-     1,976,573,019      cycles                           #    2.857 GHz                    
-     2,840,849,000      instructions                     #    1.44  insn per cycle         
-       0.748390902 seconds time elapsed
+     2,002,117,199      cycles                           #    2.852 GHz                    
+     2,866,160,766      instructions                     #    1.43  insn per cycle         
+       0.760468280 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.945199e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.094050e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.103648e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.078707e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.191865e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.200143e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 8.048215e-03 +- 4.042405e-03 )  GeV^-4
-TOTAL       :     0.483417 sec
+TOTAL       :     0.489613 sec
 INFO: No Floating Point Exceptions have been reported
-     2,042,844,461      cycles                           #    2.868 GHz                    
-     3,015,415,868      instructions                     #    1.48  insn per cycle         
-       0.770568338 seconds time elapsed
+     2,042,241,108      cycles                           #    2.868 GHz                    
+     2,998,598,647      instructions                     #    1.47  insn per cycle         
+       0.772399138 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.312575e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.315809e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.315809e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.407520e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.410948e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.410948e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.160867 sec
+TOTAL       :     0.156563 sec
 INFO: No Floating Point Exceptions have been reported
-       466,639,598      cycles                           #    2.839 GHz                    
-     1,385,046,855      instructions                     #    2.97  insn per cycle         
-       0.164971440 seconds time elapsed
+       466,584,758      cycles                           #    2.920 GHz                    
+     1,385,250,664      instructions                     #    2.97  insn per cycle         
+       0.160376464 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3796) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.419167e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.432939e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.432939e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.449696e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.462962e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.462962e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.083367 sec
+TOTAL       :     0.083015 sec
 INFO: No Floating Point Exceptions have been reported
-       239,228,784      cycles                           #    2.761 GHz                    
-       689,068,045      instructions                     #    2.88  insn per cycle         
-       0.087228912 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 9528) (avx2:    0) (512y:    0) (512z:    0)
+       239,636,465      cycles                           #    2.770 GHz                    
+       689,080,119      instructions                     #    2.88  insn per cycle         
+       0.087201828 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 9525) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.403646e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.409530e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.409530e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.414254e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.419861e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.419861e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.038935 sec
+TOTAL       :     0.038646 sec
 INFO: No Floating Point Exceptions have been reported
-       112,606,036      cycles                           #    2.660 GHz                    
-       253,498,964      instructions                     #    2.25  insn per cycle         
-       0.042949663 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8451) (512y:    0) (512z:    0)
+       111,994,100      cycles                           #    2.669 GHz                    
+       253,518,298      instructions                     #    2.26  insn per cycle         
+       0.042520952 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8457) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.592122e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.599776e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.599776e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.642367e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.650155e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.650155e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.034497 sec
+TOTAL       :     0.033489 sec
 INFO: No Floating Point Exceptions have been reported
-       100,939,846      cycles                           #    2.666 GHz                    
-       235,610,346      instructions                     #    2.33  insn per cycle         
-       0.038438765 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8091) (512y:  150) (512z:    0)
+       100,655,003      cycles                           #    2.733 GHz                    
+       235,667,417      instructions                     #    2.34  insn per cycle         
+       0.037423166 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8101) (512y:  150) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.201717e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.206827e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.206827e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.198873e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.203973e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.203973e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.045438 sec
+TOTAL       :     0.045540 sec
 INFO: No Floating Point Exceptions have been reported
-        88,129,895      cycles                           #    1.803 GHz                    
-       129,668,800      instructions                     #    1.47  insn per cycle         
-       0.049473921 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1887) (512y:  126) (512z: 7093)
+        88,110,981      cycles                           #    1.799 GHz                    
+       129,713,745      instructions                     #    1.47  insn per cycle         
+       0.049588057 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1899) (512y:  126) (512z: 7084)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt
index b30fdb0a04..0a43242226 100644
--- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 01m 01s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
-DATE: 2024-08-30_00:09:22
+DATE: 2024-09-18_13:34:44
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.439642e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.450633e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.453577e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.214942e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.224129e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.226295e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.188141e-04 +- 6.565202e-04 )  GeV^-4
-TOTAL       :     0.468424 sec
+TOTAL       :     0.475163 sec
 INFO: No Floating Point Exceptions have been reported
-     1,979,854,233      cycles                           #    2.856 GHz                    
-     2,835,931,601      instructions                     #    1.43  insn per cycle         
-       0.750354883 seconds time elapsed
+     1,995,760,495      cycles                           #    2.876 GHz                    
+     2,898,607,116      instructions                     #    1.45  insn per cycle         
+       0.751350588 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.136161e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.244237e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.264717e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.954269e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.031370e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.039107e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 8.020494e-03 +- 4.025605e-03 )  GeV^-4
-TOTAL       :     0.471781 sec
+TOTAL       :     0.476219 sec
 INFO: No Floating Point Exceptions have been reported
-     1,955,602,473      cycles                           #    2.852 GHz                    
-     2,851,428,736      instructions                     #    1.46  insn per cycle         
-       0.744473958 seconds time elapsed
+     1,999,149,645      cycles                           #    2.878 GHz                    
+     2,913,422,324      instructions                     #    1.46  insn per cycle         
+       0.751593441 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.431844e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.435246e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.435246e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.411294e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.414706e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.414706e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.177153e-04 +- 6.554185e-04 )  GeV^-4
-TOTAL       :     0.156051 sec
+TOTAL       :     0.156868 sec
 INFO: No Floating Point Exceptions have been reported
-       464,275,904      cycles                           #    2.913 GHz                    
-     1,381,903,168      instructions                     #    2.98  insn per cycle         
-       0.159971759 seconds time elapsed
+       464,525,374      cycles                           #    2.900 GHz                    
+     1,382,008,460      instructions                     #    2.98  insn per cycle         
+       0.160803882 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3058) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.210164e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.214685e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.214685e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.203598e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.208165e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.208165e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.177152e-04 +- 6.554185e-04 )  GeV^-4
-TOTAL       :     0.045622 sec
+TOTAL       :     0.045866 sec
 INFO: No Floating Point Exceptions have been reported
-       133,102,979      cycles                           #    2.717 GHz                    
-       372,002,572      instructions                     #    2.79  insn per cycle         
-       0.049577231 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:10140) (avx2:    0) (512y:    0) (512z:    0)
+       133,138,155      cycles                           #    2.706 GHz                    
+       372,169,369      instructions                     #    2.80  insn per cycle         
+       0.049817482 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:10141) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.685154e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.706602e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.706602e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.784499e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.809977e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.809977e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.165746e-04 +- 6.542823e-04 )  GeV^-4
-TOTAL       :     0.021709 sec
+TOTAL       :     0.020924 sec
 INFO: No Floating Point Exceptions have been reported
-        65,573,769      cycles                           #    2.621 GHz                    
-       142,812,899      instructions                     #    2.18  insn per cycle         
-       0.025544258 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 9237) (512y:    0) (512z:    0)
+        65,424,959      cycles                           #    2.700 GHz                    
+       142,812,066      instructions                     #    2.18  insn per cycle         
+       0.024819725 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 9241) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.090980e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.120143e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.120143e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.962557e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.993097e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.993097e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.165746e-04 +- 6.542823e-04 )  GeV^-4
-TOTAL       :     0.019209 sec
+TOTAL       :     0.019867 sec
 INFO: No Floating Point Exceptions have been reported
-        60,449,624      cycles                           #    2.693 GHz                    
-       132,675,328      instructions                     #    2.19  insn per cycle         
-       0.023011897 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8951) (512y:   28) (512z:    0)
+        60,581,334      cycles                           #    2.611 GHz                    
+       132,865,474      instructions                     #    2.19  insn per cycle         
+       0.023738141 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8959) (512y:   28) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.349292e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.369797e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.369797e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.316896e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.339579e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.339579e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.165747e-04 +- 6.542824e-04 )  GeV^-4
-TOTAL       :     0.024693 sec
+TOTAL       :     0.024992 sec
 INFO: No Floating Point Exceptions have been reported
-        52,590,410      cycles                           #    1.867 GHz                    
-        79,499,137      instructions                     #    1.51  insn per cycle         
-       0.028818231 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2813) (512y:   32) (512z: 7440)
+        52,575,011      cycles                           #    1.850 GHz                    
+        79,563,519      instructions                     #    1.51  insn per cycle         
+       0.029028726 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2836) (512y:   30) (512z: 7437)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt
index 7be61f3fb7..81fec428b9 100644
--- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 55s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
-DATE: 2024-08-30_00:09:32
+DATE: 2024-09-18_13:34:55
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.480611e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.492183e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.494882e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.237744e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.247254e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.249233e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.188141e-04 +- 6.565202e-04 )  GeV^-4
-TOTAL       :     0.466999 sec
+TOTAL       :     0.475021 sec
 INFO: No Floating Point Exceptions have been reported
-     1,994,032,599      cycles                           #    2.869 GHz                    
-     2,904,635,002      instructions                     #    1.46  insn per cycle         
-       0.752109461 seconds time elapsed
+     1,993,743,022      cycles                           #    2.872 GHz                    
+     2,918,324,117      instructions                     #    1.46  insn per cycle         
+       0.750958800 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.230787e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.343689e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.354925e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.067375e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.148140e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.156186e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 8.020496e-03 +- 4.025606e-03 )  GeV^-4
-TOTAL       :     0.468535 sec
+TOTAL       :     0.476461 sec
 INFO: No Floating Point Exceptions have been reported
-     1,974,074,352      cycles                           #    2.858 GHz                    
-     2,854,228,340      instructions                     #    1.45  insn per cycle         
-       0.747765066 seconds time elapsed
+     1,993,725,610      cycles                           #    2.868 GHz                    
+     2,900,779,066      instructions                     #    1.45  insn per cycle         
+       0.752726088 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.428329e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.431990e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.431990e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.438799e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.442175e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.442175e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.177153e-04 +- 6.554185e-04 )  GeV^-4
-TOTAL       :     0.155328 sec
+TOTAL       :     0.155006 sec
 INFO: No Floating Point Exceptions have been reported
-       461,794,147      cycles                           #    2.912 GHz                    
-     1,376,816,114      instructions                     #    2.98  insn per cycle         
-       0.159149031 seconds time elapsed
+       462,147,018      cycles                           #    2.920 GHz                    
+     1,376,798,562      instructions                     #    2.98  insn per cycle         
+       0.158894971 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2930) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.207284e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.211683e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.211683e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.224501e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.229267e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.229267e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.177152e-04 +- 6.554185e-04 )  GeV^-4
-TOTAL       :     0.044898 sec
+TOTAL       :     0.044280 sec
 INFO: No Floating Point Exceptions have been reported
-       130,815,214      cycles                           #    2.714 GHz                    
-       367,168,421      instructions                     #    2.81  insn per cycle         
-       0.048774302 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:10123) (avx2:    0) (512y:    0) (512z:    0)
+       130,643,774      cycles                           #    2.744 GHz                    
+       367,253,267      instructions                     #    2.81  insn per cycle         
+       0.048214582 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:10124) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.693057e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.717106e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.717106e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.785213e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.809806e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.809806e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.165746e-04 +- 6.542823e-04 )  GeV^-4
-TOTAL       :     0.020927 sec
+TOTAL       :     0.020207 sec
 INFO: No Floating Point Exceptions have been reported
-        63,720,009      cycles                           #    2.625 GHz                    
-       137,963,649      instructions                     #    2.17  insn per cycle         
-       0.024841609 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 9191) (512y:    0) (512z:    0)
+        63,247,605      cycles                           #    2.692 GHz                    
+       138,006,301      instructions                     #    2.18  insn per cycle         
+       0.024065097 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 9196) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.053369e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.084432e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.084432e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.053192e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.081685e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.081685e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.165746e-04 +- 6.542823e-04 )  GeV^-4
-TOTAL       :     0.018533 sec
+TOTAL       :     0.018541 sec
 INFO: No Floating Point Exceptions have been reported
-        58,283,780      cycles                           #    2.668 GHz                    
-       127,986,844      instructions                     #    2.20  insn per cycle         
-       0.022433226 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8907) (512y:   28) (512z:    0)
+        58,249,945      cycles                           #    2.668 GHz                    
+       127,981,629      instructions                     #    2.20  insn per cycle         
+       0.022408862 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8910) (512y:   28) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.325306e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.346291e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.346291e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.336383e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.358299e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.358299e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.165747e-04 +- 6.542824e-04 )  GeV^-4
-TOTAL       :     0.024145 sec
+TOTAL       :     0.024035 sec
 INFO: No Floating Point Exceptions have been reported
-        50,396,566      cycles                           #    1.827 GHz                    
-        74,723,558      instructions                     #    1.48  insn per cycle         
-       0.028196285 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2768) (512y:   32) (512z: 7442)
+        50,478,559      cycles                           #    1.838 GHz                    
+        74,763,022      instructions                     #    1.48  insn per cycle         
+       0.028059996 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2791) (512y:   30) (512z: 7439)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt
index 56a82d3822..59d9b0aed3 100644
--- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 01m 04s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
-DATE: 2024-08-30_00:09:42
+DATE: 2024-09-18_13:35:06
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.176803e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.200380e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.203980e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.754823e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.776415e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.779447e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.463813 sec
+TOTAL       :     0.471377 sec
 INFO: No Floating Point Exceptions have been reported
-     1,994,126,897      cycles                           #    2.870 GHz                    
-     2,883,414,328      instructions                     #    1.45  insn per cycle         
-       0.751549273 seconds time elapsed
+     1,997,323,985      cycles                           #    2.874 GHz                    
+     2,899,694,458      instructions                     #    1.45  insn per cycle         
+       0.752307454 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.815791e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.963506e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.977197e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.948061e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.061017e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.069565e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 8.048215e-03 +- 4.042405e-03 )  GeV^-4
-TOTAL       :     0.480482 sec
+TOTAL       :     0.489196 sec
 INFO: No Floating Point Exceptions have been reported
-     2,017,162,366      cycles                           #    2.863 GHz                    
-     2,881,486,208      instructions                     #    1.43  insn per cycle         
-       0.761315241 seconds time elapsed
+     2,036,665,309      cycles                           #    2.870 GHz                    
+     3,021,584,007      instructions                     #    1.48  insn per cycle         
+       0.771275990 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.299636e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.302850e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.302850e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.346871e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.350343e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.350343e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.162201 sec
+TOTAL       :     0.160031 sec
 INFO: No Floating Point Exceptions have been reported
-       473,962,179      cycles                           #    2.864 GHz                    
-     1,398,561,506      instructions                     #    2.95  insn per cycle         
-       0.166136124 seconds time elapsed
+       472,933,421      cycles                           #    2.893 GHz                    
+     1,398,381,136      instructions                     #    2.96  insn per cycle         
+       0.164085482 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3899) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.462459e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.474456e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.474456e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.641661e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.653702e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.653702e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.083663 sec
+TOTAL       :     0.081430 sec
 INFO: No Floating Point Exceptions have been reported
-       238,006,925      cycles                           #    2.737 GHz                    
-       688,241,004      instructions                     #    2.89  insn per cycle         
-       0.087669570 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 9327) (avx2:    0) (512y:    0) (512z:    0)
+       237,272,954      cycles                           #    2.797 GHz                    
+       688,192,491      instructions                     #    2.90  insn per cycle         
+       0.085340914 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 9334) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.408225e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.414801e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.414801e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.416781e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.422580e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.422580e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.039583 sec
+TOTAL       :     0.039309 sec
 INFO: No Floating Point Exceptions have been reported
-       113,789,328      cycles                           #    2.650 GHz                    
-       253,028,199      instructions                     #    2.22  insn per cycle         
-       0.043523208 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8351) (512y:    0) (512z:    0)
+       114,214,565      cycles                           #    2.672 GHz                    
+       253,122,283      instructions                     #    2.22  insn per cycle         
+       0.043386095 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8363) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.638322e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.646577e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.646577e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.596060e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.604256e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.604256e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.034260 sec
+TOTAL       :     0.035081 sec
 INFO: No Floating Point Exceptions have been reported
-       101,355,150      cycles                           #    2.697 GHz                    
-       233,580,541      instructions                     #    2.30  insn per cycle         
-       0.038214462 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 7489) (512y:  146) (512z:    0)
+       101,856,642      cycles                           #    2.646 GHz                    
+       233,656,157      instructions                     #    2.29  insn per cycle         
+       0.039147600 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 7501) (512y:  146) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.177279e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.182707e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.182707e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.146549e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.151691e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.151691e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.047055 sec
+TOTAL       :     0.048254 sec
 INFO: No Floating Point Exceptions have been reported
-        91,140,824      cycles                           #    1.806 GHz                    
-       133,104,919      instructions                     #    1.46  insn per cycle         
-       0.051102614 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2061) (512y:  122) (512z: 6355)
+        91,587,165      cycles                           #    1.768 GHz                    
+       133,174,500      instructions                     #    1.45  insn per cycle         
+       0.052446048 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2084) (512y:  122) (512z: 6354)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt
index f8c6c4c9fb..6686b30b4b 100644
--- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 55s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
-DATE: 2024-08-30_00:09:53
+DATE: 2024-09-18_13:35:17
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.215311e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.239767e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.243700e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.784162e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.808686e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.813201e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.461734 sec
+TOTAL       :     0.333667 sec
 INFO: No Floating Point Exceptions have been reported
-     1,988,470,447      cycles                           #    2.864 GHz                    
-     2,882,361,412      instructions                     #    1.45  insn per cycle         
-       0.750779396 seconds time elapsed
+     1,240,871,647      cycles                           #    2.848 GHz                    
+     2,449,109,840      instructions                     #    1.97  insn per cycle         
+       0.615625688 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -71,19 +67,22 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.928262e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.076946e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.087118e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.062992e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.177330e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.186889e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 8.048215e-03 +- 4.042405e-03 )  GeV^-4
-TOTAL       :     0.480694 sec
+TOTAL       :     0.492547 sec
 INFO: No Floating Point Exceptions have been reported
-     2,042,603,092      cycles                           #    2.854 GHz                    
-     2,995,512,508      instructions                     #    1.47  insn per cycle         
-       0.772035435 seconds time elapsed
+     2,036,892,131      cycles                           #    2.842 GHz                    
+     3,016,344,751      instructions                     #    1.48  insn per cycle         
+       0.776057336 seconds time elapsed
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -101,20 +100,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.350374e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.354102e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.354102e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.396244e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.399563e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.399563e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.158989 sec
+TOTAL       :     0.156900 sec
 INFO: No Floating Point Exceptions have been reported
-       469,990,230      cycles                           #    2.882 GHz                    
-     1,393,593,071      instructions                     #    2.97  insn per cycle         
-       0.163678021 seconds time elapsed
+       468,878,349      cycles                           #    2.927 GHz                    
+     1,393,744,642      instructions                     #    2.97  insn per cycle         
+       0.160773641 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3800) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -130,20 +132,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.649152e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.662415e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.662415e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.703638e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.716215e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.716215e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.080671 sec
+TOTAL       :     0.080002 sec
 INFO: No Floating Point Exceptions have been reported
-       235,246,937      cycles                           #    2.800 GHz                    
-       684,070,397      instructions                     #    2.91  insn per cycle         
-       0.084583044 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 9360) (avx2:    0) (512y:    0) (512z:    0)
+       235,588,650      cycles                           #    2.827 GHz                    
+       684,259,138      instructions                     #    2.90  insn per cycle         
+       0.083821193 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 9368) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -159,20 +164,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.433240e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.439350e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.439350e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.433569e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.439450e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.439450e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.038091 sec
+TOTAL       :     0.038120 sec
 INFO: No Floating Point Exceptions have been reported
-       111,471,077      cycles                           #    2.695 GHz                    
-       248,563,445      instructions                     #    2.23  insn per cycle         
-       0.041951935 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8304) (512y:    0) (512z:    0)
+       111,841,703      cycles                           #    2.696 GHz                    
+       248,650,538      instructions                     #    2.22  insn per cycle         
+       0.042017351 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8316) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -188,20 +196,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.616149e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.623778e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.623778e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.614208e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.621785e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.621785e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.033986 sec
+TOTAL       :     0.034000 sec
 INFO: No Floating Point Exceptions have been reported
-        99,756,711      cycles                           #    2.656 GHz                    
-       229,195,549      instructions                     #    2.30  insn per cycle         
-       0.038182932 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 7440) (512y:  146) (512z:    0)
+        99,535,427      cycles                           #    2.668 GHz                    
+       229,238,314      instructions                     #    2.30  insn per cycle         
+       0.037858332 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 7452) (512y:  146) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -217,20 +228,23 @@ Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.146301e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.151526e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.151526e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.195361e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.200436e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.200436e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.047514 sec
+TOTAL       :     0.045690 sec
 INFO: No Floating Point Exceptions have been reported
-        89,067,537      cycles                           #    1.748 GHz                    
-       128,503,438      instructions                     #    1.44  insn per cycle         
-       0.051548715 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2012) (512y:  122) (512z: 6355)
+        89,777,680      cycles                           #    1.821 GHz                    
+       128,604,385      instructions                     #    1.43  insn per cycle         
+       0.049950768 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2035) (512y:  122) (512z: 6355)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt
index 8cc24156a7..62aa2351ef 100644
--- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 01m 17s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
-DATE: 2024-08-30_00:07:50
+DATE: 2024-09-18_13:33:10
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.656669e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.778365e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.356760e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.107848e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.349751e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.801252e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.518235 sec
+TOTAL       :     0.525011 sec
 INFO: No Floating Point Exceptions have been reported
-     2,154,268,990      cycles                           #    2.871 GHz                    
-     3,045,863,111      instructions                     #    1.41  insn per cycle         
-       0.808861715 seconds time elapsed
+     2,191,922,668      cycles                           #    2.881 GHz                    
+     3,124,854,662      instructions                     #    1.43  insn per cycle         
+       0.820527123 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 132
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 130
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.039451e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.032202e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.032202e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.117531e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.040993e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.040993e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     1.272909 sec
+TOTAL       :     1.263727 sec
 INFO: No Floating Point Exceptions have been reported
-     3,733,770,917      cycles                           #    2.921 GHz                    
-     9,720,651,198      instructions                     #    2.60  insn per cycle         
-       1.278835422 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  338) (avx2:    0) (512y:    0) (512z:    0)
+     3,735,375,700      cycles                           #    2.944 GHz                    
+     9,727,971,651      instructions                     #    2.60  insn per cycle         
+       1.269703149 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  341) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.503983e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.929451e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.929451e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.512691e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.947484e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.947484e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.811462 sec
+TOTAL       :     0.807869 sec
 INFO: No Floating Point Exceptions have been reported
-     2,332,671,266      cycles                           #    2.856 GHz                    
-     5,927,947,879      instructions                     #    2.54  insn per cycle         
-       0.817546181 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1376) (avx2:    0) (512y:    0) (512z:    0)
+     2,332,400,363      cycles                           #    2.869 GHz                    
+     5,932,883,831      instructions                     #    2.54  insn per cycle         
+       0.813712795 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1369) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.225656e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.249963e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.249963e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.185960e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.183533e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.183533e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.584802 sec
+TOTAL       :     0.594661 sec
 INFO: No Floating Point Exceptions have been reported
-     1,652,529,819      cycles                           #    2.801 GHz                    
-     3,311,190,823      instructions                     #    2.00  insn per cycle         
-       0.590668347 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1492) (512y:    0) (512z:    0)
+     1,663,371,411      cycles                           #    2.773 GHz                    
+     3,314,486,720      instructions                     #    1.99  insn per cycle         
+       0.600516021 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1499) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.303666e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.389768e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.389768e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.219367e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.251513e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.251513e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.568352 sec
+TOTAL       :     0.587632 sec
 INFO: No Floating Point Exceptions have been reported
-     1,605,893,876      cycles                           #    2.800 GHz                    
-     3,280,964,739      instructions                     #    2.04  insn per cycle         
-       0.574230340 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1368) (512y:   96) (512z:    0)
+     1,614,839,496      cycles                           #    2.724 GHz                    
+     3,284,546,277      instructions                     #    2.03  insn per cycle         
+       0.593339482 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1375) (512y:   96) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.105549e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.103411e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.103411e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.129616e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.055946e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.055946e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.611625 sec
+TOTAL       :     0.606967 sec
 INFO: No Floating Point Exceptions have been reported
-     1,388,086,070      cycles                           #    2.251 GHz                    
-     2,420,953,576      instructions                     #    1.74  insn per cycle         
-       0.617361867 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  568) (512y:   60) (512z: 1020)
+     1,366,903,692      cycles                           #    2.234 GHz                    
+     2,424,948,880      instructions                     #    1.77  insn per cycle         
+       0.612713832 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  580) (512y:   60) (512z: 1021)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt
index 10a294cce6..239bb47b8a 100644
--- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 53s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
-DATE: 2024-08-30_00:08:02
+DATE: 2024-09-18_13:33:22
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.790264e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.282286e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.700064e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.181260e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.490249e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.991797e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.519308 sec
+TOTAL       :     0.525256 sec
 INFO: No Floating Point Exceptions have been reported
-     2,148,852,822      cycles                           #    2.856 GHz                    
-     3,052,949,716      instructions                     #    1.42  insn per cycle         
-       0.810262458 seconds time elapsed
+     2,186,851,153      cycles                           #    2.864 GHz                    
+     3,107,286,620      instructions                     #    1.42  insn per cycle         
+       0.822741231 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.116112e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.042418e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.042418e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.043560e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.033362e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.033362e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     1.261174 sec
+TOTAL       :     1.273177 sec
 INFO: No Floating Point Exceptions have been reported
-     3,716,003,421      cycles                           #    2.935 GHz                    
-     9,602,402,440      instructions                     #    2.58  insn per cycle         
-       1.266900529 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  356) (avx2:    0) (512y:    0) (512z:    0)
+     3,715,871,529      cycles                           #    2.906 GHz                    
+     9,610,590,320      instructions                     #    2.59  insn per cycle         
+       1.279195540 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  359) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.438745e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.829743e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.829743e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.470593e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.877997e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.877997e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.843334 sec
+TOTAL       :     0.826368 sec
 INFO: No Floating Point Exceptions have been reported
-     2,333,178,489      cycles                           #    2.750 GHz                    
-     5,873,605,419      instructions                     #    2.52  insn per cycle         
-       0.849150055 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1342) (avx2:    0) (512y:    0) (512z:    0)
+     2,333,894,912      cycles                           #    2.807 GHz                    
+     5,878,357,831      instructions                     #    2.52  insn per cycle         
+       0.832251124 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1340) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.237965e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.263926e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.263926e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.242144e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.308218e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.308218e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.580296 sec
+TOTAL       :     0.580800 sec
 INFO: No Floating Point Exceptions have been reported
-     1,642,724,354      cycles                           #    2.807 GHz                    
-     3,283,919,836      instructions                     #    2.00  insn per cycle         
-       0.585984683 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1429) (512y:    0) (512z:    0)
+     1,655,777,920      cycles                           #    2.827 GHz                    
+     3,287,720,584      instructions                     #    1.99  insn per cycle         
+       0.586391271 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1436) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.312622e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.424487e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.424487e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.289151e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.391409e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.391409e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.567544 sec
+TOTAL       :     0.570490 sec
 INFO: No Floating Point Exceptions have been reported
-     1,607,071,100      cycles                           #    2.807 GHz                    
-     3,257,865,615      instructions                     #    2.03  insn per cycle         
-       0.573398911 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1321) (512y:   96) (512z:    0)
+     1,622,799,576      cycles                           #    2.819 GHz                    
+     3,260,934,090      instructions                     #    2.01  insn per cycle         
+       0.576408659 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1328) (512y:   96) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.175885e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.130445e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.130445e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.147175e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.094895e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.094895e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.594395 sec
+TOTAL       :     0.601115 sec
 INFO: No Floating Point Exceptions have been reported
-     1,365,120,671      cycles                           #    2.277 GHz                    
-     2,405,716,159      instructions                     #    1.76  insn per cycle         
-       0.600141906 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  535) (512y:   60) (512z: 1006)
+     1,376,859,663      cycles                           #    2.272 GHz                    
+     2,409,979,343      instructions                     #    1.75  insn per cycle         
+       0.607114374 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  547) (512y:   60) (512z: 1007)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt
index bba9b80c38..d290e84a6a 100644
--- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 01m 02s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
-DATE: 2024-08-30_00:08:14
+DATE: 2024-09-18_13:33:34
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.266123e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.010227e+09                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.720164e+09                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.032821e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.078089e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.480611e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486732e-01 +- 3.293572e-05 )  GeV^0
-TOTAL       :     0.480550 sec
+TOTAL       :     0.485441 sec
 INFO: No Floating Point Exceptions have been reported
-     2,017,865,131      cycles                           #    2.862 GHz                    
-     2,870,566,692      instructions                     #    1.42  insn per cycle         
-       0.762778204 seconds time elapsed
+     2,051,454,700      cycles                           #    2.873 GHz                    
+     2,936,249,934      instructions                     #    1.43  insn per cycle         
+       0.771058253 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 100
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 97
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.059132e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.039599e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.039599e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.100530e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.045913e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.045913e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293563e-05 )  GeV^0
-TOTAL       :     1.247370 sec
+TOTAL       :     1.244009 sec
 INFO: No Floating Point Exceptions have been reported
-     3,640,038,117      cycles                           #    2.907 GHz                    
-     9,595,863,482      instructions                     #    2.64  insn per cycle         
-       1.252756564 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  462) (avx2:    0) (512y:    0) (512z:    0)
+     3,662,603,595      cycles                           #    2.932 GHz                    
+     9,601,734,780      instructions                     #    2.62  insn per cycle         
+       1.249887433 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  463) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.211831e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.341257e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.341257e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.260293e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.450195e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.450195e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293563e-05 )  GeV^0
-TOTAL       :     0.563087 sec
+TOTAL       :     0.554122 sec
 INFO: No Floating Point Exceptions have been reported
-     1,628,390,678      cycles                           #    2.868 GHz                    
-     3,963,449,124      instructions                     #    2.43  insn per cycle         
-       0.568604349 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1578) (avx2:    0) (512y:    0) (512z:    0)
+     1,637,956,120      cycles                           #    2.928 GHz                    
+     3,967,181,530      instructions                     #    2.42  insn per cycle         
+       0.560033790 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1579) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.010082e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.324116e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.324116e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.018941e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.312758e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.312758e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293562e-05 )  GeV^0
-TOTAL       :     0.436315 sec
+TOTAL       :     0.436114 sec
 INFO: No Floating Point Exceptions have been reported
-     1,251,368,298      cycles                           #    2.837 GHz                    
-     2,493,850,083      instructions                     #    1.99  insn per cycle         
-       0.441849585 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1910) (512y:    0) (512z:    0)
+     1,253,193,980      cycles                           #    2.841 GHz                    
+     2,497,513,333      instructions                     #    1.99  insn per cycle         
+       0.441707702 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1924) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.127180e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.591382e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.591382e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.105058e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.563425e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.563425e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293562e-05 )  GeV^0
-TOTAL       :     0.424520 sec
+TOTAL       :     0.426869 sec
 INFO: No Floating Point Exceptions have been reported
-     1,217,161,488      cycles                           #    2.835 GHz                    
-     2,468,132,372      instructions                     #    2.03  insn per cycle         
-       0.430034667 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1855) (512y:    1) (512z:    0)
+     1,223,516,570      cycles                           #    2.834 GHz                    
+     2,473,072,662      instructions                     #    2.02  insn per cycle         
+       0.432489185 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1870) (512y:    1) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.904252e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.911507e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.911507e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.875374e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.829234e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.829234e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293561e-05 )  GeV^0
-TOTAL       :     0.451099 sec
+TOTAL       :     0.455920 sec
 INFO: No Floating Point Exceptions have been reported
-     1,074,595,880      cycles                           #    2.357 GHz                    
-     2,071,376,740      instructions                     #    1.93  insn per cycle         
-       0.456603914 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1039) (512y:    5) (512z: 1290)
+     1,079,442,551      cycles                           #    2.341 GHz                    
+     2,072,975,829      instructions                     #    1.92  insn per cycle         
+       0.461745309 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1011) (512y:    5) (512z: 1292)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt
index d10e1afaa5..12dbe0a7bb 100644
--- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 53s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
-DATE: 2024-08-30_00:08:25
+DATE: 2024-09-18_13:33:46
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.263978e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.009055e+09                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.720229e+09                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.057555e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.155700e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.563343e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486732e-01 +- 3.293572e-05 )  GeV^0
-TOTAL       :     0.484875 sec
+TOTAL       :     0.485884 sec
 INFO: No Floating Point Exceptions have been reported
-     2,018,138,819      cycles                           #    2.844 GHz                    
-     2,860,124,556      instructions                     #    1.42  insn per cycle         
-       0.768540832 seconds time elapsed
+     2,043,628,192      cycles                           #    2.869 GHz                    
+     2,916,801,925      instructions                     #    1.43  insn per cycle         
+       0.771023658 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 93
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 86
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.165432e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.053698e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.053698e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.191416e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.056446e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.056446e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293563e-05 )  GeV^0
-TOTAL       :     1.231345 sec
+TOTAL       :     1.229162 sec
 INFO: No Floating Point Exceptions have been reported
-     3,613,467,441      cycles                           #    2.924 GHz                    
-     9,465,330,613      instructions                     #    2.62  insn per cycle         
-       1.236653003 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  366) (avx2:    0) (512y:    0) (512z:    0)
+     3,623,698,938      cycles                           #    2.936 GHz                    
+     9,471,242,034      instructions                     #    2.61  insn per cycle         
+       1.234707648 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  367) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.209580e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.334762e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.334762e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.264406e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.455240e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.455240e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293563e-05 )  GeV^0
-TOTAL       :     0.562802 sec
+TOTAL       :     0.551418 sec
 INFO: No Floating Point Exceptions have been reported
-     1,626,298,538      cycles                           #    2.865 GHz                    
-     3,929,468,943      instructions                     #    2.42  insn per cycle         
-       0.568304538 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1516) (avx2:    0) (512y:    0) (512z:    0)
+     1,633,608,321      cycles                           #    2.938 GHz                    
+     3,933,410,721      instructions                     #    2.41  insn per cycle         
+       0.556738925 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1517) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.014044e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.299344e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.299344e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.014495e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.293948e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.293948e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293562e-05 )  GeV^0
-TOTAL       :     0.435235 sec
+TOTAL       :     0.435710 sec
 INFO: No Floating Point Exceptions have been reported
-     1,243,896,160      cycles                           #    2.827 GHz                    
-     2,478,597,676      instructions                     #    1.99  insn per cycle         
-       0.440693366 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1801) (512y:    0) (512z:    0)
+     1,251,845,572      cycles                           #    2.841 GHz                    
+     2,481,653,408      instructions                     #    1.98  insn per cycle         
+       0.441241697 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1817) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.125580e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.589773e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.589773e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.125464e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.603160e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.603160e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293562e-05 )  GeV^0
-TOTAL       :     0.422774 sec
+TOTAL       :     0.424252 sec
 INFO: No Floating Point Exceptions have been reported
-     1,213,663,106      cycles                           #    2.839 GHz                    
-     2,455,056,340      instructions                     #    2.02  insn per cycle         
-       0.428143641 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1764) (512y:    1) (512z:    0)
+     1,222,912,229      cycles                           #    2.849 GHz                    
+     2,456,305,937      instructions                     #    2.01  insn per cycle         
+       0.429917564 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1773) (512y:    1) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.932782e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.989263e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.989263e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.934438e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.010088e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.010088e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293561e-05 )  GeV^0
-TOTAL       :     0.445733 sec
+TOTAL       :     0.446349 sec
 INFO: No Floating Point Exceptions have been reported
-     1,066,104,747      cycles                           #    2.367 GHz                    
-     2,055,437,063      instructions                     #    1.93  insn per cycle         
-       0.451267064 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  934) (512y:    5) (512z: 1271)
+     1,072,792,214      cycles                           #    2.378 GHz                    
+     2,057,138,403      instructions                     #    1.92  insn per cycle         
+       0.451920157 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  906) (512y:    5) (512z: 1273)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt
index 4acbefe345..8f7e2917bf 100644
--- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 01m 02s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
-DATE: 2024-08-30_00:08:36
+DATE: 2024-09-18_13:33:57
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.660194e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.763645e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.371516e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.090014e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.319571e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.751667e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.518809 sec
+TOTAL       :     0.522308 sec
 INFO: No Floating Point Exceptions have been reported
-     2,144,176,882      cycles                           #    2.847 GHz                    
-     3,054,167,922      instructions                     #    1.42  insn per cycle         
-       0.809452032 seconds time elapsed
+     2,179,170,623      cycles                           #    2.882 GHz                    
+     3,109,984,327      instructions                     #    1.43  insn per cycle         
+       0.814646692 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 132
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 130
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.940769e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.019109e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.019109e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.952529e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.022459e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.022459e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     1.286203 sec
+TOTAL       :     1.284601 sec
 INFO: No Floating Point Exceptions have been reported
-     3,776,980,405      cycles                           #    2.925 GHz                    
-     9,745,669,027      instructions                     #    2.58  insn per cycle         
-       1.292070140 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  338) (avx2:    0) (512y:    0) (512z:    0)
+     3,782,838,045      cycles                           #    2.933 GHz                    
+     9,753,328,321      instructions                     #    2.58  insn per cycle         
+       1.290389924 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  341) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.493346e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.916716e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.916716e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.563360e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.027715e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.027715e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.815959 sec
+TOTAL       :     0.783750 sec
 INFO: No Floating Point Exceptions have been reported
-     2,288,270,177      cycles                           #    2.787 GHz                    
-     5,912,884,833      instructions                     #    2.58  insn per cycle         
-       0.821822014 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1409) (avx2:    0) (512y:    0) (512z:    0)
+     2,313,452,686      cycles                           #    2.933 GHz                    
+     5,920,736,181      instructions                     #    2.56  insn per cycle         
+       0.789531453 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1412) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.286080e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.371020e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.371020e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.274544e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.372577e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.372577e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.572108 sec
+TOTAL       :     0.574756 sec
 INFO: No Floating Point Exceptions have been reported
-     1,616,702,363      cycles                           #    2.801 GHz                    
-     3,250,741,760      instructions                     #    2.01  insn per cycle         
-       0.577859574 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1555) (512y:    0) (512z:    0)
+     1,639,105,587      cycles                           #    2.827 GHz                    
+     3,253,580,218      instructions                     #    1.98  insn per cycle         
+       0.580508158 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1567) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.334033e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.467430e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.467430e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.338032e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.481810e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.481810e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.562193 sec
+TOTAL       :     0.561846 sec
 INFO: No Floating Point Exceptions have been reported
-     1,592,350,337      cycles                           #    2.807 GHz                    
-     3,206,636,539      instructions                     #    2.01  insn per cycle         
-       0.567886333 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1434) (512y:  101) (512z:    0)
+     1,602,124,528      cycles                           #    2.826 GHz                    
+     3,209,983,521      instructions                     #    2.00  insn per cycle         
+       0.567621873 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1446) (512y:  101) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.160554e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.100255e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.100255e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.198566e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.176156e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.176156e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.600536 sec
+TOTAL       :     0.590094 sec
 INFO: No Floating Point Exceptions have been reported
-     1,346,255,520      cycles                           #    2.223 GHz                    
-     2,374,224,628      instructions                     #    1.76  insn per cycle         
-       0.606410866 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  744) (512y:   64) (512z: 1062)
+     1,347,708,343      cycles                           #    2.265 GHz                    
+     2,376,834,038      instructions                     #    1.76  insn per cycle         
+       0.595752442 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  768) (512y:   64) (512z: 1063)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt
index 1a9f8ec0a0..856901d743 100644
--- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 54s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
-DATE: 2024-08-30_00:08:49
+DATE: 2024-09-18_13:34:09
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.788888e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.340980e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.793695e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.212511e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.510212e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.023434e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.515813 sec
+TOTAL       :     0.523683 sec
 INFO: No Floating Point Exceptions have been reported
-     2,143,423,318      cycles                           #    2.860 GHz                    
-     3,067,491,467      instructions                     #    1.43  insn per cycle         
-       0.806493056 seconds time elapsed
+     2,161,000,888      cycles                           #    2.849 GHz                    
+     3,093,780,518      instructions                     #    1.43  insn per cycle         
+       0.816657446 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.981536e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.025136e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.025136e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.006386e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.027076e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.027076e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     1.279928 sec
+TOTAL       :     1.275858 sec
 INFO: No Floating Point Exceptions have been reported
-     3,757,035,204      cycles                           #    2.924 GHz                    
-     9,636,078,564      instructions                     #    2.56  insn per cycle         
-       1.285735539 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  356) (avx2:    0) (512y:    0) (512z:    0)
+     3,759,691,883      cycles                           #    2.936 GHz                    
+     9,643,680,583      instructions                     #    2.57  insn per cycle         
+       1.281474685 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  359) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.550168e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.006120e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.006120e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.517196e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.947819e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.947819e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.788402 sec
+TOTAL       :     0.804148 sec
 INFO: No Floating Point Exceptions have been reported
-     2,290,168,715      cycles                           #    2.887 GHz                    
-     5,855,355,373      instructions                     #    2.56  insn per cycle         
-       0.794115436 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 1367) (avx2:    0) (512y:    0) (512z:    0)
+     2,322,905,849      cycles                           #    2.871 GHz                    
+     5,850,527,655      instructions                     #    2.52  insn per cycle         
+       0.809789330 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 1371) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.244805e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.317031e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.317031e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.254780e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.333242e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.333242e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.580569 sec
+TOTAL       :     0.577840 sec
 INFO: No Floating Point Exceptions have been reported
-     1,633,570,710      cycles                           #    2.789 GHz                    
-     3,214,560,636      instructions                     #    1.97  insn per cycle         
-       0.586312446 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1471) (512y:    0) (512z:    0)
+     1,650,198,876      cycles                           #    2.831 GHz                    
+     3,216,570,367      instructions                     #    1.95  insn per cycle         
+       0.583563842 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1483) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.358491e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.525177e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.525177e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.314025e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.454653e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.454653e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.556422 sec
+TOTAL       :     0.565918 sec
 INFO: No Floating Point Exceptions have been reported
-     1,578,013,364      cycles                           #    2.811 GHz                    
-     3,178,344,168      instructions                     #    2.01  insn per cycle         
-       0.561965085 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1370) (512y:  101) (512z:    0)
+     1,600,538,363      cycles                           #    2.803 GHz                    
+     3,181,550,003      instructions                     #    1.99  insn per cycle         
+       0.571587963 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1382) (512y:  101) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.190170e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.161682e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.161682e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.185908e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.142994e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.142994e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.591028 sec
+TOTAL       :     0.592175 sec
 INFO: No Floating Point Exceptions have been reported
-     1,349,381,848      cycles                           #    2.264 GHz                    
-     2,358,384,697      instructions                     #    1.75  insn per cycle         
-       0.596719851 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  692) (512y:   64) (512z: 1053)
+     1,356,716,498      cycles                           #    2.272 GHz                    
+     2,361,264,569      instructions                     #    1.74  insn per cycle         
+       0.597815792 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  716) (512y:   64) (512z: 1056)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt
index e62b93b708..99516e3f65 100644
--- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 01m 17s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-30_00:05:27
+DATE: 2024-09-18_13:30:47
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.645125e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.174225e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.284950e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.206537e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.286021e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.966943e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     0.530944 sec
+TOTAL       :     0.539704 sec
 INFO: No Floating Point Exceptions have been reported
-     2,186,537,838      cycles                           #    2.869 GHz                    
-     3,159,166,393      instructions                     #    1.44  insn per cycle         
-       0.821865584 seconds time elapsed
+     2,208,534,555      cycles                           #    2.845 GHz                    
+     3,150,536,398      instructions                     #    1.43  insn per cycle         
+       0.835623159 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.819724e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.866410e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.866410e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.822612e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.869779e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.869779e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     5.870423 sec
+TOTAL       :     5.861835 sec
 INFO: No Floating Point Exceptions have been reported
-    17,214,056,104      cycles                           #    2.930 GHz                    
-    45,926,568,401      instructions                     #    2.67  insn per cycle         
-       5.876101310 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  623) (avx2:    0) (512y:    0) (512z:    0)
+    17,248,615,219      cycles                           #    2.940 GHz                    
+    45,920,744,006      instructions                     #    2.66  insn per cycle         
+       5.867505238 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  622) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.190034e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.347526e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.347526e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.157644e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.314617e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.314617e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     3.395213 sec
+TOTAL       :     3.430104 sec
 INFO: No Floating Point Exceptions have been reported
-     9,982,315,760      cycles                           #    2.936 GHz                    
-    27,799,777,118      instructions                     #    2.78  insn per cycle         
-       3.401040734 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2:    0) (512y:    0) (512z:    0)
+    10,035,725,674      cycles                           #    2.922 GHz                    
+    27,802,903,324      instructions                     #    2.77  insn per cycle         
+       3.435933108 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 2537) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.999649e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.380612e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.380612e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.941289e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.318652e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.318652e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.205225 sec
+TOTAL       :     2.231098 sec
 INFO: No Floating Point Exceptions have been reported
-     6,075,288,503      cycles                           #    2.749 GHz                    
-    12,582,306,777      instructions                     #    2.07  insn per cycle         
-       2.211180421 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2613) (512y:    0) (512z:    0)
+     6,101,804,369      cycles                           #    2.729 GHz                    
+    12,586,990,350      instructions                     #    2.06  insn per cycle         
+       2.237005738 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2620) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.454536e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.904963e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.904963e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.519324e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.987161e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.987161e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.030596 sec
+TOTAL       :     2.007245 sec
 INFO: No Floating Point Exceptions have been reported
-     5,581,988,175      cycles                           #    2.742 GHz                    
-    11,997,380,759      instructions                     #    2.15  insn per cycle         
-       2.036484083 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2356) (512y:  144) (512z:    0)
+     5,563,695,868      cycles                           #    2.765 GHz                    
+    12,000,166,171      instructions                     #    2.16  insn per cycle         
+       2.013040788 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2365) (512y:  144) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.473092e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.649502e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.649502e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.502694e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.684435e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.684435e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     3.126818 sec
+TOTAL       :     3.101349 sec
 INFO: No Floating Point Exceptions have been reported
-     5,723,367,750      cycles                           #    1.828 GHz                    
-     8,340,914,234      instructions                     #    1.46  insn per cycle         
-       3.132596047 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1456) (512y:  122) (512z: 1805)
+     5,749,698,258      cycles                           #    1.851 GHz                    
+     8,343,640,860      instructions                     #    1.45  insn per cycle         
+       3.107135736 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1468) (512y:  122) (512z: 1806)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt
index dd7dc97e3b..1f4bfaf624 100644
--- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 55s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-30_00:05:52
+DATE: 2024-09-18_13:31:12
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.652880e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.169554e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.279516e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.340722e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.356922e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.992900e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     0.531091 sec
+TOTAL       :     0.532286 sec
 INFO: No Floating Point Exceptions have been reported
-     2,192,996,113      cycles                           #    2.837 GHz                    
-     3,138,073,320      instructions                     #    1.43  insn per cycle         
-       0.831810121 seconds time elapsed
+     2,205,060,845      cycles                           #    2.868 GHz                    
+     3,167,717,935      instructions                     #    1.44  insn per cycle         
+       0.825884785 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 208
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 212
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.864790e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.913608e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.913608e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.873402e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.922894e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.922894e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     5.729763 sec
+TOTAL       :     5.705020 sec
 INFO: No Floating Point Exceptions have been reported
-    16,766,275,374      cycles                           #    2.924 GHz                    
-    44,912,110,298      instructions                     #    2.68  insn per cycle         
-       5.735460834 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  567) (avx2:    0) (512y:    0) (512z:    0)
+    16,751,892,515      cycles                           #    2.934 GHz                    
+    44,906,929,991      instructions                     #    2.68  insn per cycle         
+       5.710885629 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  566) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.345782e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.518358e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.518358e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.361567e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.536177e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.536177e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     3.241480 sec
+TOTAL       :     3.227440 sec
 INFO: No Floating Point Exceptions have been reported
-     9,507,410,834      cycles                           #    2.929 GHz                    
-    26,685,640,419      instructions                     #    2.81  insn per cycle         
-       3.247226358 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 2330) (avx2:    0) (512y:    0) (512z:    0)
+     9,512,762,540      cycles                           #    2.943 GHz                    
+    26,678,539,109      instructions                     #    2.80  insn per cycle         
+       3.233163450 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 2326) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.592169e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.910029e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.910029e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.604596e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.927835e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.927835e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.390784 sec
+TOTAL       :     2.385363 sec
 INFO: No Floating Point Exceptions have been reported
-     6,586,629,911      cycles                           #    2.750 GHz                    
-    14,105,772,712      instructions                     #    2.14  insn per cycle         
-       2.396587613 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2697) (512y:    0) (512z:    0)
+     6,599,025,301      cycles                           #    2.760 GHz                    
+    14,108,971,201      instructions                     #    2.14  insn per cycle         
+       2.391489598 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2705) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.780110e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.122504e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.122504e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.791684e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.138771e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.138771e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.299915 sec
+TOTAL       :     2.294784 sec
 INFO: No Floating Point Exceptions have been reported
-     6,327,227,681      cycles                           #    2.745 GHz                    
-    13,699,353,623      instructions                     #    2.17  insn per cycle         
-       2.305670472 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2348) (512y:  297) (512z:    0)
+     6,350,789,081      cycles                           #    2.762 GHz                    
+    13,712,967,214      instructions                     #    2.16  insn per cycle         
+       2.300513281 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2356) (512y:  298) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.358446e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.524734e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.524734e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.371675e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.540530e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.540530e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     3.229645 sec
+TOTAL       :     3.217988 sec
 INFO: No Floating Point Exceptions have been reported
-     5,923,380,282      cycles                           #    1.832 GHz                    
-    10,098,206,916      instructions                     #    1.70  insn per cycle         
-       3.235349944 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1306) (512y:  208) (512z: 1985)
+     5,939,821,646      cycles                           #    1.843 GHz                    
+    10,101,817,070      instructions                     #    1.70  insn per cycle         
+       3.223668588 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1318) (512y:  208) (512z: 1986)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt
index 2ac6c8341c..1a672b74ce 100644
--- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 01m 03s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-30_00:06:17
+DATE: 2024-09-18_13:31:37
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.712341e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.185127e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.398560e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.264093e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.766977e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.882650e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072877e+00 +- 3.361153e-03 )  GeV^0
-TOTAL       :     0.486465 sec
+TOTAL       :     0.489615 sec
 INFO: No Floating Point Exceptions have been reported
-     2,023,703,635      cycles                           #    2.842 GHz                    
-     2,905,407,935      instructions                     #    1.44  insn per cycle         
-       0.768933327 seconds time elapsed
+     2,060,695,462      cycles                           #    2.874 GHz                    
+     2,961,708,283      instructions                     #    1.44  insn per cycle         
+       0.774445109 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 149
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 125
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.920991e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.974908e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.974908e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.937524e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.992418e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.992418e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072937e+00 +- 3.361545e-03 )  GeV^0
-TOTAL       :     5.544585 sec
+TOTAL       :     5.498876 sec
 INFO: No Floating Point Exceptions have been reported
-    16,210,015,502      cycles                           #    2.921 GHz                    
-    45,320,815,835      instructions                     #    2.80  insn per cycle         
-       5.550177665 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  601) (avx2:    0) (512y:    0) (512z:    0)
+    16,211,815,789      cycles                           #    2.946 GHz                    
+    45,319,917,505      instructions                     #    2.80  insn per cycle         
+       5.504546294 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  600) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.506457e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.846418e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.846418e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.533229e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.869354e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.869354e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072937e+00 +- 3.361544e-03 )  GeV^0
-TOTAL       :     2.413252 sec
+TOTAL       :     2.401545 sec
 INFO: No Floating Point Exceptions have been reported
-     7,077,065,356      cycles                           #    2.927 GHz                    
-    17,770,661,239      instructions                     #    2.51  insn per cycle         
-       2.418827542 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 3136) (avx2:    0) (512y:    0) (512z:    0)
+     7,056,760,375      cycles                           #    2.932 GHz                    
+    17,791,878,594      instructions                     #    2.52  insn per cycle         
+       2.407391534 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 3147) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.282819e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.390404e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.390404e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.087610e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.152694e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.152694e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072967e+00 +- 3.361967e-03 )  GeV^0
-TOTAL       :     1.351583 sec
+TOTAL       :     1.383748 sec
 INFO: No Floating Point Exceptions have been reported
-     3,739,036,824      cycles                           #    2.757 GHz                    
-     8,260,771,297      instructions                     #    2.21  insn per cycle         
-       1.357115595 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3355) (512y:    0) (512z:    0)
+     3,839,977,803      cycles                           #    2.765 GHz                    
+     8,262,037,377      instructions                     #    2.15  insn per cycle         
+       1.389311013 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3371) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.783112e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.003814e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.003814e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.847495e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.011837e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.011837e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072967e+00 +- 3.361967e-03 )  GeV^0
-TOTAL       :     1.280052 sec
+TOTAL       :     1.271772 sec
 INFO: No Floating Point Exceptions have been reported
-     3,542,795,885      cycles                           #    2.758 GHz                    
-     7,916,136,647      instructions                     #    2.23  insn per cycle         
-       1.285449312 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3201) (512y:   20) (512z:    0)
+     3,548,498,858      cycles                           #    2.779 GHz                    
+     7,914,474,526      instructions                     #    2.23  insn per cycle         
+       1.277559305 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3214) (512y:   20) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.415170e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.052151e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.052151e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.536546e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.195032e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.195032e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072967e+00 +- 3.361967e-03 )  GeV^0
-TOTAL       :     1.721159 sec
+TOTAL       :     1.690671 sec
 INFO: No Floating Point Exceptions have been reported
-     3,264,706,701      cycles                           #    1.893 GHz                    
-     6,097,584,670      instructions                     #    1.87  insn per cycle         
-       1.726769804 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2294) (512y:   24) (512z: 2154)
+     3,256,995,213      cycles                           #    1.921 GHz                    
+     6,100,882,884      instructions                     #    1.87  insn per cycle         
+       1.696260075 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2258) (512y:   22) (512z: 2156)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt
index bb153103dc..d3b2f0408f 100644
--- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 54s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-30_00:06:38
+DATE: 2024-09-18_13:31:58
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.119307e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.455409e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.728882e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.208288e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.783701e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.898530e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072877e+00 +- 3.361153e-03 )  GeV^0
-TOTAL       :     0.486421 sec
+TOTAL       :     0.487345 sec
 INFO: No Floating Point Exceptions have been reported
-     2,033,006,308      cycles                           #    2.862 GHz                    
-     2,873,303,358      instructions                     #    1.41  insn per cycle         
-       0.769599438 seconds time elapsed
+     2,044,938,895      cycles                           #    2.858 GHz                    
+     2,894,501,323      instructions                     #    1.42  insn per cycle         
+       0.773252899 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 128
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 124
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.956818e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.013845e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.013845e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.963294e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.019360e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.019360e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072937e+00 +- 3.361545e-03 )  GeV^0
-TOTAL       :     5.444034 sec
+TOTAL       :     5.426510 sec
 INFO: No Floating Point Exceptions have been reported
-    15,986,130,227      cycles                           #    2.934 GHz                    
-    44,427,962,929      instructions                     #    2.78  insn per cycle         
-       5.449527371 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  534) (avx2:    0) (512y:    0) (512z:    0)
+    15,955,926,327      cycles                           #    2.938 GHz                    
+    44,427,771,107      instructions                     #    2.78  insn per cycle         
+       5.431874949 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  533) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.304706e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.775665e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.775665e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.335493e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.807156e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.807156e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072937e+00 +- 3.361544e-03 )  GeV^0
-TOTAL       :     2.062387 sec
+TOTAL       :     2.051564 sec
 INFO: No Floating Point Exceptions have been reported
-     6,060,207,395      cycles                           #    2.932 GHz                    
-    17,068,760,146      instructions                     #    2.82  insn per cycle         
-       2.067941987 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 2863) (avx2:    0) (512y:    0) (512z:    0)
+     6,058,187,563      cycles                           #    2.946 GHz                    
+    17,074,725,200      instructions                     #    2.82  insn per cycle         
+       2.057140058 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 2862) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.034117e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.607363e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.607363e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.066914e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.644109e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.644109e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072967e+00 +- 3.361967e-03 )  GeV^0
-TOTAL       :     1.822223 sec
+TOTAL       :     1.813330 sec
 INFO: No Floating Point Exceptions have been reported
-     5,017,549,044      cycles                           #    2.747 GHz                    
-    10,219,780,372      instructions                     #    2.04  insn per cycle         
-       1.827614223 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3892) (512y:    0) (512z:    0)
+     5,026,891,048      cycles                           #    2.765 GHz                    
+    10,223,175,449      instructions                     #    2.03  insn per cycle         
+       1.818918027 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3906) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.106149e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.683031e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.683031e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.155601e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.742490e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.742490e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072967e+00 +- 3.361967e-03 )  GeV^0
-TOTAL       :     1.802302 sec
+TOTAL       :     1.789620 sec
 INFO: No Floating Point Exceptions have been reported
-     4,959,754,152      cycles                           #    2.745 GHz                    
-     9,989,877,535      instructions                     #    2.01  insn per cycle         
-       1.807732961 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3793) (512y:    2) (512z:    0)
+     4,970,225,584      cycles                           #    2.770 GHz                    
+     9,994,978,881      instructions                     #    2.01  insn per cycle         
+       1.795236203 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3805) (512y:    2) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.617211e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.936164e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.936164e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.666448e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.992729e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.992729e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072967e+00 +- 3.361967e-03 )  GeV^0
-TOTAL       :     2.356646 sec
+TOTAL       :     2.333237 sec
 INFO: No Floating Point Exceptions have been reported
-     4,364,327,832      cycles                           #    1.848 GHz                    
-     8,442,316,116      instructions                     #    1.93  insn per cycle         
-       2.362315380 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2782) (512y:    4) (512z: 2752)
+     4,367,486,322      cycles                           #    1.868 GHz                    
+     8,444,271,998      instructions                     #    1.93  insn per cycle         
+       2.338821094 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2744) (512y:    4) (512z: 2754)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt
index b209de599f..c1f4bb8132 100644
--- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 01m 03s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-30_00:07:00
+DATE: 2024-09-18_13:32:20
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.640674e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.169556e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.279935e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.373966e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.408476e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.005223e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     0.528492 sec
+TOTAL       :     0.533114 sec
 INFO: No Floating Point Exceptions have been reported
-     2,185,199,416      cycles                           #    2.838 GHz                    
-     3,106,274,692      instructions                     #    1.42  insn per cycle         
-       0.827757615 seconds time elapsed
+     2,212,396,057      cycles                           #    2.876 GHz                    
+     3,189,695,931      instructions                     #    1.44  insn per cycle         
+       0.826417249 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.793535e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.838632e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.838632e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.812942e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.859362e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.859362e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     5.953704 sec
+TOTAL       :     5.891473 sec
 INFO: No Floating Point Exceptions have been reported
-    17,390,993,785      cycles                           #    2.919 GHz                    
-    46,078,480,657      instructions                     #    2.65  insn per cycle         
-       5.959481036 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  623) (avx2:    0) (512y:    0) (512z:    0)
+    17,373,992,128      cycles                           #    2.947 GHz                    
+    46,072,043,013      instructions                     #    2.65  insn per cycle         
+       5.897196721 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  622) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.191739e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.349907e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.349907e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.226094e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.386425e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.386425e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     3.393170 sec
+TOTAL       :     3.358561 sec
 INFO: No Floating Point Exceptions have been reported
-     9,945,468,646      cycles                           #    2.927 GHz                    
-    27,597,530,111      instructions                     #    2.77  insn per cycle         
-       3.398902714 seconds time elapsed
+     9,911,091,884      cycles                           #    2.947 GHz                    
+    27,587,758,232      instructions                     #    2.78  insn per cycle         
+       3.364358964 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2581) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.990071e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.366649e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.366649e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.044961e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.439076e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.439076e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.209354 sec
+TOTAL       :     2.186755 sec
 INFO: No Floating Point Exceptions have been reported
-     6,023,134,211      cycles                           #    2.720 GHz                    
-    12,486,085,370      instructions                     #    2.07  insn per cycle         
-       2.215108609 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2765) (512y:    0) (512z:    0)
+     6,022,763,481      cycles                           #    2.748 GHz                    
+    12,488,130,017      instructions                     #    2.07  insn per cycle         
+       2.192467039 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2776) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.529154e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.998828e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.998828e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.596506e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.079685e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.079685e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.004558 sec
+TOTAL       :     1.980331 sec
 INFO: No Floating Point Exceptions have been reported
-     5,485,809,258      cycles                           #    2.730 GHz                    
-    11,922,343,140      instructions                     #    2.17  insn per cycle         
-       2.010441607 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2510) (512y:  146) (512z:    0)
+     5,504,974,873      cycles                           #    2.773 GHz                    
+    11,923,154,801      instructions                     #    2.17  insn per cycle         
+       1.986372291 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2521) (512y:  146) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.563890e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.750749e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.750749e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.610025e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.802161e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.802161e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     3.050811 sec
+TOTAL       :     3.013333 sec
 INFO: No Floating Point Exceptions have been reported
-     5,611,379,647      cycles                           #    1.836 GHz                    
-     8,110,650,078      instructions                     #    1.45  insn per cycle         
-       3.056719977 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1646) (512y:  126) (512z: 1865)
+     5,617,715,088      cycles                           #    1.861 GHz                    
+     8,110,898,143      instructions                     #    1.44  insn per cycle         
+       3.019371634 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1671) (512y:  126) (512z: 1865)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt
index 9758c0e4fb..744bfec9d4 100644
--- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt
@@ -1,8 +1,4 @@
 
-------------------------------------------------
-Preliminary build completed in 0d 00h 00m 54s
-------------------------------------------------
-
 Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
@@ -44,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp
 make[1]: Nothing to be done for 'all'.
 make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-08-30_00:07:25
+DATE: 2024-09-18_13:32:45
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
@@ -53,22 +49,25 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts:
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.640042e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.163678e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.273008e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.356227e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.388949e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.002637e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     0.528347 sec
+TOTAL       :     0.530171 sec
 INFO: No Floating Point Exceptions have been reported
-     2,195,921,164      cycles                           #    2.863 GHz                    
-     3,159,163,941      instructions                     #    1.44  insn per cycle         
-       0.824586714 seconds time elapsed
+     2,205,062,942      cycles                           #    2.875 GHz                    
+     3,154,626,469      instructions                     #    1.43  insn per cycle         
+       0.823696592 seconds time elapsed
 runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1
-==PROF== Profiling "sigmaKin": launch__registers_per_thread 208
+==PROF== Profiling "sigmaKin": launch__registers_per_thread 212
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
@@ -86,20 +85,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.848710e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.896952e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.896952e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.861428e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.909561e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.909561e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     5.778145 sec
+TOTAL       :     5.739728 sec
 INFO: No Floating Point Exceptions have been reported
-    16,939,093,132      cycles                           #    2.929 GHz                    
-    45,097,100,323      instructions                     #    2.66  insn per cycle         
-       5.783860430 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:  568) (avx2:    0) (512y:    0) (512z:    0)
+    16,938,834,117      cycles                           #    2.949 GHz                    
+    45,091,140,717      instructions                     #    2.66  insn per cycle         
+       5.745446347 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:  567) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -115,20 +117,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.316686e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.486642e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.486642e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.325491e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.496074e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.496074e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     3.267618 sec
+TOTAL       :     3.260792 sec
 INFO: No Floating Point Exceptions have been reported
-     9,524,753,937      cycles                           #    2.911 GHz                    
-    26,244,236,470      instructions                     #    2.76  insn per cycle         
-       3.273278507 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4: 2385) (avx2:    0) (512y:    0) (512z:    0)
+     9,505,160,256      cycles                           #    2.910 GHz                    
+    26,249,919,899      instructions                     #    2.76  insn per cycle         
+       3.266614954 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4: 2386) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -144,20 +149,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.499453e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.802367e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.802367e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.459875e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.763541e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.763541e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.437149 sec
+TOTAL       :     2.458814 sec
 INFO: No Floating Point Exceptions have been reported
-     6,708,501,181      cycles                           #    2.747 GHz                    
-    14,027,891,193      instructions                     #    2.09  insn per cycle         
-       2.442844457 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2883) (512y:    0) (512z:    0)
+     6,750,977,111      cycles                           #    2.740 GHz                    
+    14,029,286,718      instructions                     #    2.08  insn per cycle         
+       2.464538527 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2895) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -173,20 +181,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.730475e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.065255e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.065255e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.781257e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.129375e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.129375e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.323336 sec
+TOTAL       :     2.300139 sec
 INFO: No Floating Point Exceptions have been reported
-     6,393,451,931      cycles                           #    2.746 GHz                    
-    13,511,993,077      instructions                     #    2.11  insn per cycle         
-       2.329209524 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2519) (512y:  302) (512z:    0)
+     6,382,631,497      cycles                           #    2.769 GHz                    
+    13,515,067,929      instructions                     #    2.12  insn per cycle         
+       2.305941749 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2531) (512y:  302) (512z:    0)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
@@ -202,20 +213,23 @@ Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.589660e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.781904e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.781904e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.602901e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.797238e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.797238e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     3.028874 sec
+TOTAL       :     3.017121 sec
 INFO: No Floating Point Exceptions have been reported
-     5,571,310,646      cycles                           #    1.837 GHz                    
-     9,204,211,713      instructions                     #    1.65  insn per cycle         
-       3.034661225 seconds time elapsed
-=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1431) (512y:  212) (512z: 2058)
+     5,589,518,345      cycles                           #    1.850 GHz                    
+     9,206,594,679      instructions                     #    1.65  insn per cycle         
+       3.022936699 seconds time elapsed
+=Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1456) (512y:  212) (512z: 2059)
 -------------------------------------------------------------------------
 runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
-[  PASSED  ] 3 tests.
+[  PASSED  ] 4 tests.
+DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
+INFO: No Floating Point Exceptions have been reported
+DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2

From 716ebaff6f87a3fea5320e1ec2dee1b609c7a50e Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Fri, 20 Sep 2024 11:34:27 +0200
Subject: [PATCH 41/50] [helas] move to upstream/master gg_tt.mad codegen log
 for easier merging

---
 .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt       | 63 ++++++++-----------
 1 file changed, 25 insertions(+), 38 deletions(-)

diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
index c87deb69b9..6ff78a3661 100644
--- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
+++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
@@ -14,7 +14,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect         2023-12-23         *
+*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -45,11 +45,6 @@ Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (inclu
 Note that you can still compile and run aMC@NLO with the built-in PDFs
  MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
 
-None does not seem to correspond to a valid lhapdf-config executable. 
-Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config).
-Note that you can still compile and run aMC@NLO with the built-in PDFs
- MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
-
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
@@ -62,7 +57,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005797863006591797 [0m
+[1;32mDEBUG: model prefixing  takes 0.0057220458984375 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,20 +150,20 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.009 s
+1 processes with 3 diagrams generated in 0.008 s
 Total: 1 processes with 3 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
+[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m standalone_simd [1;30m[export_cpp.py at line 3070][0m [0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 162][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_gg_tt 
 INFO: remove old information in CODEGEN_mad_gg_tt 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 167][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt 
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards [0m
@@ -177,43 +172,33 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
 INFO: Creating files in directory P1_gg_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m False True 32 [1;30m[export_v4.py at line 1877][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
-[1;32mDEBUG:  vector, subproc_group,self.opt['vector_size'] = [0m 32 True 32 [1;30m[export_v4.py at line 1877][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx [1;30m[export_v4.py at line 6444][0m [0m
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1601][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1625][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1626][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1523][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1547][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1548][0m [0m
 Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
-Wrote files for 10 helas calls in 0.110 s
+Wrote files for 10 helas calls in 0.074 s
+[1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.150 s
+ALOHA: aloha creates 2 routines in  0.149 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 4 routines in  0.138 s
+ALOHA: aloha creates 4 routines in  0.137 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.h
@@ -228,22 +213,24 @@ INFO: Use c++ compiler g++
 INFO: Generate jpeg diagrams 
 INFO: Generate web pages 
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
-patching file Source/genps.inc
 patching file SubProcesses/makefile
 DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
-patching file auto_dsig1.f
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 255][0m [0m
+Hunk #2 succeeded at 227 (offset 13 lines).
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.143s
-user	0m1.859s
-sys	0m0.281s
+real	0m2.049s
+user	0m1.643s
+sys	0m0.271s
 Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
@@ -257,7 +244,7 @@ Code generation completed in 2 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -287,7 +274,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *

From e6561e9a45af6b35d210691392ee9d622473f490 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Sat, 21 Sep 2024 10:33:41 +0200
Subject: [PATCH 42/50] [helas] regenerate all processes after merging master
 and amd

---
 .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt   |  28 +--
 .../CODEGEN_cudacpp_ee_mumu_log.txt           |  13 +-
 .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt       |  26 ++-
 .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt    |  14 +-
 .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt |  39 ++--
 .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt     |  30 +--
 .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt  |  16 +-
 .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt   |  31 +--
 .../CODEGEN_cudacpp_gg_ttgg_log.txt           |  15 +-
 .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt |  31 +--
 .../CODEGEN_cudacpp_gg_ttggg_log.txt          |  19 +-
 .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt     |  39 ++--
 .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt  |  19 +-
 .../CODEGEN_mad_heft_gg_bb_log.txt            |  27 ++-
 .../CODEGEN_cudacpp_heft_gg_bb_log.txt        |  16 +-
 .../CODEGEN_mad_nobm_pp_ttW_log.txt           |  89 ++++----
 .../SubProcesses/MemoryAccessGs.h             |   2 +-
 .../SubProcesses/MemoryAccessMomenta.h        |   2 +-
 .../SubProcesses/P0_dux_ttxwm/CPPProcess.cc   |  14 +-
 .../SubProcesses/P0_dux_ttxwm/check_sa.cc     |   2 +
 .../SubProcesses/P0_udx_ttxwp/CPPProcess.cc   |  14 +-
 .../SubProcesses/P0_udx_ttxwp/check_sa.cc     |   2 +
 .../SubProcesses/P1_dux_ttxwmg/CPPProcess.cc  |  56 ++---
 .../SubProcesses/P1_dux_ttxwmg/check_sa.cc    |   2 +
 .../SubProcesses/P1_gd_ttxwmu/CPPProcess.cc   |  56 ++---
 .../SubProcesses/P1_gd_ttxwmu/check_sa.cc     |   2 +
 .../SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc |  56 ++---
 .../SubProcesses/P1_gdx_ttxwpux/check_sa.cc   |   2 +
 .../SubProcesses/P1_gu_ttxwpd/CPPProcess.cc   |  56 ++---
 .../SubProcesses/P1_gu_ttxwpd/check_sa.cc     |   2 +
 .../SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc |  56 ++---
 .../SubProcesses/P1_gux_ttxwmdx/check_sa.cc   |   2 +
 .../SubProcesses/P1_udx_ttxwpg/CPPProcess.cc  |  56 ++---
 .../SubProcesses/P1_udx_ttxwpg/check_sa.cc    |   2 +
 .../nobm_pp_ttW.mad/SubProcesses/cudacpp.mk   |  21 +-
 .../src/HelAmps_sm_no_b_mass.h                | 206 ++++++++++++++++++
 .../nobm_pp_ttW.mad/src/cudacpp_config.mk     |   2 +-
 .../nobm_pp_ttW.mad/src/mgOnGpuConfig.h       |  16 +-
 .../CODEGEN_mad_pp_tt012j_log.txt             | 171 ++++++++-------
 .../CODEGEN_mad_smeft_gg_tttt_log.txt         |  31 +--
 .../SubProcesses/P1_gg_ttxttx/matrix1.pdf     | Bin 375126 -> 375126 bytes
 .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt     |  19 +-
 .../CODEGEN_mad_susy_gg_t1t1_log.txt          |  27 ++-
 .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt      |  15 +-
 .../CODEGEN_mad_susy_gg_tt_log.txt            |  27 ++-
 .../CODEGEN_cudacpp_susy_gg_tt_log.txt        |  18 +-
 46 files changed, 869 insertions(+), 520 deletions(-)

diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
index e9017c49df..9231cca725 100644
--- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
+++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
@@ -57,7 +57,7 @@ generate e+ e- > mu+ mu-
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005692958831787109 [0m
+[1;32mDEBUG: model prefixing  takes 0.0054187774658203125 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -149,7 +149,7 @@ INFO: Checking for minimal orders which gives processes.
 INFO: Please specify coupling orders to bypass this step. 
 INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1  
 INFO: Process has 2 diagrams 
-1 processes with 2 diagrams generated in 0.005 s
+1 processes with 2 diagrams generated in 0.004 s
 Total: 1 processes with 2 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -171,30 +171,30 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 
 INFO: Processing color information for process: e+ e- > mu+ mu- @1 
 INFO: Creating files in directory P1_epem_mupmum 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 
 INFO: Finding symmetric diagrams for subprocess group epem_mupmum 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 2 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 2 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1629][0m [0m
 Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s
-Wrote files for 8 helas calls in 0.072 s
+Wrote files for 8 helas calls in 0.069 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
-ALOHA: aloha creates 3 routines in  0.205 s
+ALOHA: aloha creates 3 routines in  0.202 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
 ALOHA: aloha creates FFV2_4 routines[0m
-ALOHA: aloha creates 7 routines in  0.260 s
+ALOHA: aloha creates 7 routines in  0.256 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2
@@ -205,6 +205,8 @@ ALOHA: aloha creates 7 routines in  0.260 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2_4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.h
@@ -234,10 +236,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m3.845s
-user	0m1.829s
-sys	0m0.251s
-Code generation completed in 4 seconds
+real	0m2.045s
+user	0m1.764s
+sys	0m0.267s
+Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
index 608753e001..66b00dcdfc 100644
--- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
+++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+('WARNING: loading of madgraph too slow!!!', 0.9350659847259521)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -57,7 +58,7 @@ generate e+ e- > mu+ mu-
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005699634552001953 [0m
+[1;32mDEBUG: model prefixing  takes 0.005448341369628906 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -177,7 +178,7 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
 ALOHA: aloha creates FFV2_4 routines[0m
-ALOHA: aloha creates 4 routines in  0.276 s
+ALOHA: aloha creates 4 routines in  0.268 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2
@@ -188,6 +189,8 @@ ALOHA: aloha creates 4 routines in  0.276 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2_4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.h
@@ -196,7 +199,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. 
 quit
 
-real	0m0.775s
-user	0m0.619s
-sys	0m0.043s
+real	0m1.485s
+user	0m1.339s
+sys	0m0.076s
 Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
index 6ff78a3661..76275688f6 100644
--- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
+++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
@@ -57,7 +57,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.0057220458984375 [0m
+[1;32mDEBUG: model prefixing  takes 0.006117820739746094 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.008 s
+1 processes with 3 diagrams generated in 0.009 s
 Total: 1 processes with 3 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -172,33 +172,35 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
 INFO: Creating files in directory P1_gg_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1629][0m [0m
 Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
-Wrote files for 10 helas calls in 0.074 s
+Wrote files for 10 helas calls in 0.072 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.149 s
+ALOHA: aloha creates 2 routines in  0.151 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 4 routines in  0.137 s
+ALOHA: aloha creates 4 routines in  0.138 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.h
@@ -228,9 +230,9 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.049s
-user	0m1.643s
-sys	0m0.271s
+real	0m2.087s
+user	0m1.660s
+sys	0m0.268s
 Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt
index 16f7e1adfd..3ebd41d53b 100644
--- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt
+++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt
@@ -57,7 +57,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005597114562988281 [0m
+[1;32mDEBUG: model prefixing  takes 0.005710601806640625 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.008 s
+1 processes with 3 diagrams generated in 0.009 s
 Total: 1 processes with 3 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt
 Load PLUGIN.CUDACPP_OUTPUT
@@ -176,13 +176,15 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.146 s
+ALOHA: aloha creates 2 routines in  0.142 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.h
@@ -191,7 +193,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. 
 quit
 
-real	0m0.897s
-user	0m0.476s
-sys	0m0.056s
+real	0m0.529s
+user	0m0.473s
+sys	0m0.054s
 Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt
index 10026f0131..ec542bea1e 100644
--- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt
+++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+('WARNING: loading of madgraph too slow!!!', 3.125483751296997)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -57,7 +58,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005770444869995117 [0m
+[1;32mDEBUG: model prefixing  takes 0.005392551422119141 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -150,7 +151,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.008 s
+1 processes with 3 diagrams generated in 0.009 s
 Total: 1 processes with 3 diagrams
 add process g g > t t~ g
 INFO: Checking for minimal orders which gives processes. 
@@ -182,29 +183,29 @@ INFO: Processing color information for process: g g > t t~ g @2
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
 INFO: Creating files in directory P2_gg_ttxg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxg 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P1_gg_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1548][0m [0m
-Generated helas calls for 2 subprocesses (19 diagrams) in 0.044 s
-Wrote files for 46 helas calls in 0.194 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1629][0m [0m
+Generated helas calls for 2 subprocesses (19 diagrams) in 0.043 s
+Wrote files for 46 helas calls in 0.187 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
@@ -212,14 +213,14 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 5 routines in  0.332 s
+ALOHA: aloha creates 5 routines in  0.324 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 10 routines in  0.319 s
+ALOHA: aloha creates 10 routines in  0.308 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -231,6 +232,8 @@ ALOHA: aloha creates 10 routines in  0.319 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.h
@@ -267,10 +270,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.887s
-user	0m2.344s
-sys	0m0.296s
-Code generation completed in 3 seconds
+real	0m5.585s
+user	0m2.269s
+sys	0m0.318s
+Code generation completed in 6 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt
index 76bd44c193..0ff2bec0cd 100644
--- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt
+++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt
@@ -57,7 +57,7 @@ generate g g > t t~ g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005703926086425781 [0m
+[1;32mDEBUG: model prefixing  takes 0.00566864013671875 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1  
 INFO: Process has 16 diagrams 
-1 processes with 16 diagrams generated in 0.022 s
+1 processes with 16 diagrams generated in 0.021 s
 Total: 1 processes with 16 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -172,18 +172,18 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 
 INFO: Processing color information for process: g g > t t~ g @1 
 INFO: Creating files in directory P1_gg_ttxg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxg 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1548][0m [0m
-Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s
-Wrote files for 36 helas calls in 0.123 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1629][0m [0m
+Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s
+Wrote files for 36 helas calls in 0.120 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
@@ -191,14 +191,14 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 5 routines in  0.332 s
+ALOHA: aloha creates 5 routines in  0.328 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 10 routines in  0.319 s
+ALOHA: aloha creates 10 routines in  0.313 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -210,6 +210,8 @@ ALOHA: aloha creates 10 routines in  0.319 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.h
@@ -239,10 +241,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m3.320s
-user	0m2.195s
-sys	0m0.274s
-Code generation completed in 4 seconds
+real	0m2.429s
+user	0m2.166s
+sys	0m0.265s
+Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt
index 7fff8e07b7..a47c73a7ff 100644
--- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt
+++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt
@@ -57,7 +57,7 @@ generate g g > t t~ g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005605220794677734 [0m
+[1;32mDEBUG: model prefixing  takes 0.0056798458099365234 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -172,14 +172,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. 
-Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s
+Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 5 routines in  0.334 s
+ALOHA: aloha creates 5 routines in  0.326 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -191,6 +191,8 @@ ALOHA: aloha creates 5 routines in  0.334 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.h
@@ -199,7 +201,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. 
 quit
 
-real	0m0.887s
-user	0m0.734s
-sys	0m0.052s
-Code generation completed in 1 seconds
+real	0m0.775s
+user	0m0.728s
+sys	0m0.045s
+Code generation completed in 0 seconds
diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt
index b0e5cdabdf..d91926119e 100644
--- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt
+++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+('WARNING: loading of madgraph too slow!!!', 3.1265456676483154)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -57,7 +58,7 @@ generate g g > t t~ g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.00579071044921875 [0m
+[1;32mDEBUG: model prefixing  takes 0.005780458450317383 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -150,7 +151,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1  
 INFO: Process has 123 diagrams 
-1 processes with 123 diagrams generated in 0.164 s
+1 processes with 123 diagrams generated in 0.160 s
 Total: 1 processes with 123 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -172,18 +173,18 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 
 INFO: Processing color information for process: g g > t t~ g g @1 
 INFO: Creating files in directory P1_gg_ttxgg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 105 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [1;30m[model_handling.py at line 1548][0m [0m
-Generated helas calls for 1 subprocesses (123 diagrams) in 0.434 s
-Wrote files for 222 helas calls in 0.680 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 105 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [1;30m[model_handling.py at line 1629][0m [0m
+Generated helas calls for 1 subprocesses (123 diagrams) in 0.421 s
+Wrote files for 222 helas calls in 0.669 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
@@ -191,14 +192,14 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.339 s
+ALOHA: aloha creates 5 routines in  0.333 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 10 routines in  0.320 s
+ALOHA: aloha creates 10 routines in  0.328 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -213,6 +214,8 @@ ALOHA: aloha creates 10 routines in  0.320 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.h
@@ -242,10 +245,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m3.940s
-user	0m3.563s
-sys	0m0.292s
-Code generation completed in 4 seconds
+real	0m6.857s
+user	0m3.548s
+sys	0m0.300s
+Code generation completed in 7 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt
index 9ec88f76e4..5a0e2d7582 100644
--- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt
+++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+('WARNING: loading of madgraph too slow!!!', 3.1273341178894043)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -57,7 +58,7 @@ generate g g > t t~ g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005756378173828125 [0m
+[1;32mDEBUG: model prefixing  takes 0.005579710006713867 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -172,14 +173,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. 
-Generated helas calls for 1 subprocesses (123 diagrams) in 0.442 s
+Generated helas calls for 1 subprocesses (123 diagrams) in 0.434 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.340 s
+ALOHA: aloha creates 5 routines in  0.330 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -194,6 +195,8 @@ ALOHA: aloha creates 5 routines in  0.340 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.h
@@ -202,7 +205,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. 
 quit
 
-real	0m1.576s
-user	0m1.428s
+real	0m4.478s
+user	0m1.411s
 sys	0m0.060s
-Code generation completed in 2 seconds
+Code generation completed in 4 seconds
diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt
index f3d8babced..3725e24847 100644
--- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt
+++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+('WARNING: loading of madgraph too slow!!!', 3.1302731037139893)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -57,7 +58,7 @@ generate g g > t t~ g g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.0056743621826171875 [0m
+[1;32mDEBUG: model prefixing  takes 0.005527973175048828 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -150,7 +151,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1  
 INFO: Process has 1240 diagrams 
-1 processes with 1240 diagrams generated in 1.949 s
+1 processes with 1240 diagrams generated in 1.894 s
 Total: 1 processes with 1240 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -174,18 +175,18 @@ INFO: Processing color information for process: g g > t t~ g g g @1
 INFO: Creating files in directory P1_gg_ttxggg 
 INFO: Computing Color-Flow optimization [15120 term] 
 INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 945 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [1;30m[model_handling.py at line 1548][0m [0m
-Generated helas calls for 1 subprocesses (1240 diagrams) in 6.727 s
-Wrote files for 2281 helas calls in 18.905 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 945 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [1;30m[model_handling.py at line 1629][0m [0m
+Generated helas calls for 1 subprocesses (1240 diagrams) in 6.532 s
+Wrote files for 2281 helas calls in 18.323 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
@@ -193,14 +194,14 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.331 s
+ALOHA: aloha creates 5 routines in  0.322 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 10 routines in  0.320 s
+ALOHA: aloha creates 10 routines in  0.314 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -215,6 +216,8 @@ ALOHA: aloha creates 10 routines in  0.320 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.h
@@ -244,10 +247,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m33.441s
-user	0m32.899s
-sys	0m0.437s
-Code generation completed in 34 seconds
+real	0m35.534s
+user	0m31.967s
+sys	0m0.462s
+Code generation completed in 36 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt
index d61dcd4ef1..49c1de1923 100644
--- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt
+++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+('WARNING: loading of madgraph too slow!!!', 3.1222457885742188)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -57,7 +58,7 @@ generate g g > t t~ g g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005732297897338867 [0m
+[1;32mDEBUG: model prefixing  takes 0.005368709564208984 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -150,7 +151,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1  
 INFO: Process has 1240 diagrams 
-1 processes with 1240 diagrams generated in 1.936 s
+1 processes with 1240 diagrams generated in 1.869 s
 Total: 1 processes with 1240 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg
 Load PLUGIN.CUDACPP_OUTPUT
@@ -172,14 +173,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. 
-Generated helas calls for 1 subprocesses (1240 diagrams) in 6.740 s
+Generated helas calls for 1 subprocesses (1240 diagrams) in 6.510 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.358 s
+ALOHA: aloha creates 5 routines in  0.353 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -194,6 +195,8 @@ ALOHA: aloha creates 5 routines in  0.358 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.h
@@ -202,7 +205,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. 
 quit
 
-real	0m13.472s
-user	0m13.208s
-sys	0m0.101s
-Code generation completed in 14 seconds
+real	0m15.955s
+user	0m12.788s
+sys	0m0.111s
+Code generation completed in 16 seconds
diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt
index 2a9ce74dda..9710cfb084 100644
--- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt
+++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+('WARNING: loading of madgraph too slow!!!', 3.124884605407715)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -56,7 +57,7 @@ set zerowidth_tchannel F
 define q = u c d s u~ c~ d~ s~
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005671024322509766 [0m
+[1;32mDEBUG: model prefixing  takes 0.005639791488647461 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -165,7 +166,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams.
 INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. 
 INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. 
 INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. 
-8 processes with 40 diagrams generated in 0.079 s
+8 processes with 40 diagrams generated in 0.078 s
 Total: 8 processes with 40 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -195,38 +196,38 @@ INFO: Combined process g c~ > t t~ c~ WEIGHTED<=3 @1 with process g u~ > t t~ u~
 INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Creating files in directory P1_gu_ttxu 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gu_ttxu 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P1_gux_ttxux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gux_ttxux 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1548][0m [0m
-Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s
-Wrote files for 32 helas calls in 0.166 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1629][0m [0m
+Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s
+Wrote files for 32 helas calls in 0.163 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.151 s
+ALOHA: aloha creates 2 routines in  0.145 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVV1 routines[0m
-ALOHA: aloha creates 4 routines in  0.137 s
+ALOHA: aloha creates 4 routines in  0.132 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -234,6 +235,8 @@ ALOHA: aloha creates 4 routines in  0.137 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.h
@@ -272,10 +275,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.831s
-user	0m1.943s
-sys	0m0.282s
-Code generation completed in 3 seconds
+real	0m5.184s
+user	0m1.878s
+sys	0m0.306s
+Code generation completed in 6 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt
index 527d0c838e..ac0ee82295 100644
--- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt
+++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+('WARNING: loading of madgraph too slow!!!', 3.1240127086639404)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -56,7 +57,7 @@ set zerowidth_tchannel F
 define q = u c d s u~ c~ d~ s~
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005598783493041992 [0m
+[1;32mDEBUG: model prefixing  takes 0.0057010650634765625 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -165,7 +166,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams.
 INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. 
 INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. 
 INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. 
-8 processes with 40 diagrams generated in 0.080 s
+8 processes with 40 diagrams generated in 0.078 s
 Total: 8 processes with 40 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq
 Load PLUGIN.CUDACPP_OUTPUT
@@ -204,11 +205,11 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. 
-Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s
+Generated helas calls for 2 subprocesses (10 diagrams) in 0.029 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.147 s
+ALOHA: aloha creates 2 routines in  0.145 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -216,6 +217,8 @@ ALOHA: aloha creates 2 routines in  0.147 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.h
@@ -224,7 +227,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. 
 quit
 
-real	0m0.747s
-user	0m0.588s
-sys	0m0.059s
-Code generation completed in 1 seconds
+real	0m3.646s
+user	0m0.580s
+sys	0m0.062s
+Code generation completed in 3 seconds
diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt
index 7aaebdcb5e..bf6f44f959 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt
+++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+('WARNING: loading of madgraph too slow!!!', 3.1262729167938232)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -144,31 +145,31 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > b b~ HIG<=1 HIW<=1 @1 
 INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 
 INFO: Creating files in directory P1_gg_bbx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > b b~ HIG<=1 HIW<=1 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_bbx 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 4 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1548][0m [0m
-Generated helas calls for 1 subprocesses (4 diagrams) in 0.009 s
-Wrote files for 12 helas calls in 0.078 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 4 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1629][0m [0m
+Generated helas calls for 1 subprocesses (4 diagrams) in 0.008 s
+Wrote files for 12 helas calls in 0.075 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVS3 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFS2 routines[0m
-ALOHA: aloha creates 4 routines in  0.271 s
+ALOHA: aloha creates 4 routines in  0.263 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVS3 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFS2 routines[0m
-ALOHA: aloha creates 8 routines in  0.254 s
+ALOHA: aloha creates 8 routines in  0.246 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVS3
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -177,6 +178,8 @@ ALOHA: aloha creates 8 routines in  0.254 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFS2
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./HelAmps_heft.h
 INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./Parameters_heft.h
@@ -206,10 +209,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.179s
-user	0m1.907s
-sys	0m0.272s
-Code generation completed in 2 seconds
+real	0m5.115s
+user	0m1.836s
+sys	0m0.279s
+Code generation completed in 6 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt
index eeec277df5..e72c4d139b 100644
--- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt
+++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+('WARNING: loading of madgraph too slow!!!', 3.1248199939727783)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -57,6 +58,11 @@ set auto_convert_model T
 save options auto_convert_model
 save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt
 import model heft
+INFO: load particles 
+INFO: load vertices 
+[1;34mWARNING: coupling GC_13=-(complex(0,1)*GH) has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model. [0m
+[1;34mWARNING: coupling GC_16=(complex(0,1)*Gphi)/8. has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model. [0m
+[1;32mDEBUG: model prefixing  takes 0.006158590316772461 [0m
 INFO: Restrict model heft with file models/heft/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: s u w+ at order: QED=1 [0m
@@ -159,6 +165,8 @@ ALOHA: aloha creates 4 routines in  0.269 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFS2
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./HelAmps_heft.h
 INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./Parameters_heft.h
@@ -167,7 +175,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. 
 quit
 
-real	0m0.664s
-user	0m0.584s
-sys	0m0.059s
-Code generation completed in 1 seconds
+real	0m3.668s
+user	0m0.614s
+sys	0m0.050s
+Code generation completed in 4 seconds
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt
index 64acb57091..ad9954e12d 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+('WARNING: loading of madgraph too slow!!!', 3.1298129558563232)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -56,7 +57,7 @@ set zerowidth_tchannel F
 import model sm-no_b_mass
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005632638931274414 [0m
+[1;32mDEBUG: model prefixing  takes 0.005838632583618164 [0m
 INFO: Restrict model sm-no_b_mass with file models/sm/restrict_no_b_mass.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -180,7 +181,7 @@ INFO: Process u~ d > t t~ w- added to mirror process d u~ > t t~ w-
 INFO: Process c~ s > t t~ w- added to mirror process s c~ > t t~ w- 
 INFO: Process d~ u > t t~ w+ added to mirror process u d~ > t t~ w+ 
 INFO: Process s~ c > t t~ w+ added to mirror process c s~ > t t~ w+ 
-4 processes with 8 diagrams generated in 0.110 s
+4 processes with 8 diagrams generated in 0.113 s
 Total: 4 processes with 8 diagrams
 add process p p > t t~ w j @1
 INFO: Checking for minimal orders which gives processes. 
@@ -222,7 +223,7 @@ INFO: Process d~ g > t t~ w+ u~ added to mirror process g d~ > t t~ w+ u~
 INFO: Process d~ u > t t~ w+ g added to mirror process u d~ > t t~ w+ g 
 INFO: Process s~ g > t t~ w+ c~ added to mirror process g s~ > t t~ w+ c~ 
 INFO: Process s~ c > t t~ w+ g added to mirror process c s~ > t t~ w+ g 
-12 processes with 144 diagrams generated in 0.657 s
+12 processes with 144 diagrams generated in 0.671 s
 Total: 16 processes with 152 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_nobm_pp_ttW --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -266,106 +267,106 @@ INFO: Generating Helas calls for process: d u~ > t t~ w- WEIGHTED<=4
 INFO: Reusing existing color information for process: d u~ > t t~ w- 
 INFO: Combined process s c~ > t t~ w- WEIGHTED<=4 with process d u~ > t t~ w- WEIGHTED<=4 
 INFO: Creating files in directory P1_gu_ttxwpd 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g u > t t~ w+ d WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group gu_ttxwpd 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P1_gd_ttxwmu 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g d > t t~ w- u WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group gd_ttxwmu 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P1_gux_ttxwmdx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g u~ > t t~ w- d~ WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group gux_ttxwmdx 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P1_gdx_ttxwpux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g d~ > t t~ w+ u~ WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group gdx_ttxwpux 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P1_udx_ttxwpg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u d~ > t t~ w+ g WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group udx_ttxwpg 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P1_dux_ttxwmg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: d u~ > t t~ w- g WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group dux_ttxwmg 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P0_udx_ttxwp 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u d~ > t t~ w+ WEIGHTED<=4 
 INFO: Finding symmetric diagrams for subprocess group udx_ttxwp 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 2 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 2 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P0_dux_ttxwm 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: d u~ > t t~ w- WEIGHTED<=4 
 INFO: Finding symmetric diagrams for subprocess group dux_ttxwm 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 2 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1548][0m [0m
-Generated helas calls for 8 subprocesses (76 diagrams) in 0.208 s
-Wrote files for 212 helas calls in 0.885 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 2 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1629][0m [0m
+Generated helas calls for 8 subprocesses (76 diagrams) in 0.211 s
+Wrote files for 212 helas calls in 0.860 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
-ALOHA: aloha creates 3 routines in  0.205 s
+ALOHA: aloha creates 3 routines in  0.213 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
-ALOHA: aloha creates 6 routines in  0.203 s
+ALOHA: aloha creates 6 routines in  0.210 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -375,6 +376,8 @@ ALOHA: aloha creates 6 routines in  0.203 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./HelAmps_sm_no_b_mass.h
 INFO: Created file HelAmps_sm_no_b_mass.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./Parameters_sm_no_b_mass.h
@@ -461,10 +464,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m5.225s
-user	0m4.183s
-sys	0m0.518s
-Code generation completed in 6 seconds
+real	0m7.811s
+user	0m4.243s
+sys	0m0.554s
+Code generation completed in 8 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/MemoryAccessGs.h
index 63c17a68fa..8c82f1a2b9 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/MemoryAccessGs.h
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/MemoryAccessGs.h
@@ -10,7 +10,7 @@
 
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
-#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
+#include "MemoryBuffers.h" // for HostBufferGs::isaligned
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/MemoryAccessMomenta.h
index 1bba0f5e80..140833e7a4 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/MemoryAccessMomenta.h
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/MemoryAccessMomenta.h
@@ -8,7 +8,7 @@
 
 #include "mgOnGpuConfig.h"
 
-#include "CPPProcess.h"
+#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
 #include "MemoryAccessHelpers.h"
 #include "MemoryAccessVectors.h"
 
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.cc
index cf966578d2..fd07ae8741 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.cc
@@ -207,7 +207,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -219,7 +221,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -333,11 +337,11 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[1], cHel[ihel][4], +1, w_fp[4], 4 );
 
-      FFV2_2<W_ACCESS, CI_ACCESS>( w_fp[0], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[5] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CI_FFV2_2( w_fp[0], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[5] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -348,10 +352,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 2 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV2_1<W_ACCESS, CI_ACCESS>( w_fp[1], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[5] );
+      helas_CI_FFV2_1( w_fp[1], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[5], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[0], w_fp[5], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/check_sa.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/check_sa.cc
index aee105f269..35a9fef55a 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/check_sa.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/check_sa.cc
@@ -971,6 +971,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.cc
index d2297469b1..d402a62668 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.cc
@@ -207,7 +207,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -219,7 +221,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -333,11 +337,11 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, cIPD[1], cHel[ihel][4], +1, w_fp[4], 4 );
 
-      FFV2_2<W_ACCESS, CI_ACCESS>( w_fp[0], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[5] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CI_FFV2_2( w_fp[0], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[5] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -348,10 +352,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 2 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV2_1<W_ACCESS, CI_ACCESS>( w_fp[1], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[5] );
+      helas_CI_FFV2_1( w_fp[1], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[5] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[5], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[0], w_fp[5], w_fp[6], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/check_sa.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/check_sa.cc
index aee105f269..35a9fef55a 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/check_sa.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/check_sa.cc
@@ -971,6 +971,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.cc
index 1936860d35..a378daf0fc 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.cc
@@ -207,7 +207,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -219,7 +221,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -335,12 +339,12 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 );
 
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[7] );
-      FFV2_2<W_ACCESS, CI_ACCESS>( w_fp[6], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
+      helas_CD_FFV1_2( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[7] );
+      helas_CI_FFV2_2( w_fp[6], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -351,10 +355,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 12 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV2_1<W_ACCESS, CI_ACCESS>( w_fp[1], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
+      helas_CI_FFV2_1( w_fp[1], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -365,12 +369,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 12 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[5], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[6] );
-      FFV2_2<W_ACCESS, CI_ACCESS>( w_fp[0], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[9] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[5], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[6] );
+      helas_CI_FFV2_2( w_fp[0], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -384,7 +388,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[0], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -395,11 +399,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 12 ***
 
       // Wavefunction(s) for diagram number 5
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[5], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[10] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[10], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[5], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[10] );
+      helas_CD_FFV1P0_3( w_fp[10], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[1], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[1], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -413,7 +417,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[0], w_fp[8], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -424,10 +428,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 12 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[3], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[3], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -438,10 +442,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 8 OF 12 ***
 
       // Wavefunction(s) for diagram number 8
-      FFV2_1<W_ACCESS, CI_ACCESS>( w_fp[3], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[10] );
+      helas_CI_FFV2_1( w_fp[3], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 8
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[0], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -452,10 +456,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 9 OF 12 ***
 
       // Wavefunction(s) for diagram number 9
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[9], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_2( w_fp[9], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 9
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -466,10 +470,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 12 ***
 
       // Wavefunction(s) for diagram number 10
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_VVV1P0_1( w_fp[5], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 10
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -483,7 +487,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 11
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[0], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -494,10 +498,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 12 ***
 
       // Wavefunction(s) for diagram number 12
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_1( w_fp[8], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 12
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[0], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/check_sa.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/check_sa.cc
index aee105f269..35a9fef55a 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/check_sa.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/check_sa.cc
@@ -971,6 +971,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.cc
index 67f4e2f692..a396d9ffe1 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.cc
@@ -207,7 +207,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -219,7 +221,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -335,12 +339,12 @@ namespace mg5amcCpu
 
       oxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 );
 
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[7] );
-      FFV2_2<W_ACCESS, CI_ACCESS>( w_fp[6], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
+      helas_CD_FFV1_2( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[7] );
+      helas_CI_FFV2_2( w_fp[6], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -351,10 +355,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 12 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV2_1<W_ACCESS, CI_ACCESS>( w_fp[5], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
+      helas_CI_FFV2_1( w_fp[5], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -365,12 +369,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 12 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[6] );
-      FFV2_2<W_ACCESS, CI_ACCESS>( w_fp[1], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[9] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[6] );
+      helas_CI_FFV2_2( w_fp[1], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[5], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[5], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -384,7 +388,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[1], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -395,11 +399,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 12 ***
 
       // Wavefunction(s) for diagram number 5
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[10] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[10], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[10] );
+      helas_CD_FFV1P0_3( w_fp[10], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[5], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[5], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -413,7 +417,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[1], w_fp[8], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -424,10 +428,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 12 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_1( w_fp[5], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[3], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[3], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -438,10 +442,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 8 OF 12 ***
 
       // Wavefunction(s) for diagram number 8
-      FFV2_1<W_ACCESS, CI_ACCESS>( w_fp[3], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[10] );
+      helas_CI_FFV2_1( w_fp[3], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 8
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[1], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -452,10 +456,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 9 OF 12 ***
 
       // Wavefunction(s) for diagram number 9
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[9], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_2( w_fp[9], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 9
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -466,10 +470,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 12 ***
 
       // Wavefunction(s) for diagram number 10
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 10
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[5], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[5], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -483,7 +487,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 11
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[1], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -494,10 +498,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 12 ***
 
       // Wavefunction(s) for diagram number 12
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[8], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_1( w_fp[8], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 12
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[1], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/check_sa.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/check_sa.cc
index aee105f269..35a9fef55a 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/check_sa.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/check_sa.cc
@@ -971,6 +971,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc
index 43af98566f..c04d68bc8f 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc
@@ -207,7 +207,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -219,7 +221,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -335,12 +339,12 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 );
 
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[7] );
-      FFV2_2<W_ACCESS, CI_ACCESS>( w_fp[6], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
+      helas_CD_FFV1_2( w_fp[5], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[7] );
+      helas_CI_FFV2_2( w_fp[6], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -351,10 +355,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 12 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV2_1<W_ACCESS, CI_ACCESS>( w_fp[1], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
+      helas_CI_FFV2_1( w_fp[1], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -365,12 +369,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 12 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[6] );
-      FFV2_2<W_ACCESS, CI_ACCESS>( w_fp[5], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[9] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[6] );
+      helas_CI_FFV2_2( w_fp[5], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -384,7 +388,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -395,11 +399,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 12 ***
 
       // Wavefunction(s) for diagram number 5
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[10] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[10], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[10] );
+      helas_CD_FFV1P0_3( w_fp[10], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[1], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[1], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -413,7 +417,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[8], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[8], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -424,10 +428,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 12 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_1( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[3], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[3], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -438,10 +442,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 8 OF 12 ***
 
       // Wavefunction(s) for diagram number 8
-      FFV2_1<W_ACCESS, CI_ACCESS>( w_fp[3], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[10] );
+      helas_CI_FFV2_1( w_fp[3], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 8
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -452,10 +456,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 9 OF 12 ***
 
       // Wavefunction(s) for diagram number 9
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[9], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_2( w_fp[9], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 9
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -466,10 +470,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 12 ***
 
       // Wavefunction(s) for diagram number 10
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 10
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -483,7 +487,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 11
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -494,10 +498,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 12 ***
 
       // Wavefunction(s) for diagram number 12
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[8], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_1( w_fp[8], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 12
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/check_sa.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/check_sa.cc
index aee105f269..35a9fef55a 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/check_sa.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/check_sa.cc
@@ -971,6 +971,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.cc
index a2e0da387c..e608979ace 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.cc
@@ -207,7 +207,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -219,7 +221,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -335,12 +339,12 @@ namespace mg5amcCpu
 
       oxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 );
 
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[7] );
-      FFV2_2<W_ACCESS, CI_ACCESS>( w_fp[6], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
+      helas_CD_FFV1_2( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[7] );
+      helas_CI_FFV2_2( w_fp[6], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -351,10 +355,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 12 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV2_1<W_ACCESS, CI_ACCESS>( w_fp[5], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
+      helas_CI_FFV2_1( w_fp[5], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -365,12 +369,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 12 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[6] );
-      FFV2_2<W_ACCESS, CI_ACCESS>( w_fp[1], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[9] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[6] );
+      helas_CI_FFV2_2( w_fp[1], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[5], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[5], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -384,7 +388,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[1], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -395,11 +399,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 12 ***
 
       // Wavefunction(s) for diagram number 5
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[10] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[10], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[10] );
+      helas_CD_FFV1P0_3( w_fp[10], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[5], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[5], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -413,7 +417,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[1], w_fp[8], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -424,10 +428,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 12 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_1( w_fp[5], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[3], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[3], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -438,10 +442,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 8 OF 12 ***
 
       // Wavefunction(s) for diagram number 8
-      FFV2_1<W_ACCESS, CI_ACCESS>( w_fp[3], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[10] );
+      helas_CI_FFV2_1( w_fp[3], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 8
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[1], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -452,10 +456,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 9 OF 12 ***
 
       // Wavefunction(s) for diagram number 9
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[9], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_2( w_fp[9], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 9
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[5], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -466,10 +470,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 12 ***
 
       // Wavefunction(s) for diagram number 10
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 10
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[5], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[5], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -483,7 +487,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 11
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[1], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -494,10 +498,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 12 ***
 
       // Wavefunction(s) for diagram number 12
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[8], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_1( w_fp[8], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 12
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[1], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[1], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/check_sa.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/check_sa.cc
index aee105f269..35a9fef55a 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/check_sa.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/check_sa.cc
@@ -971,6 +971,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc
index 00d5cdcf7c..f680dc6436 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc
@@ -207,7 +207,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -219,7 +221,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -335,12 +339,12 @@ namespace mg5amcCpu
 
       ixxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 );
 
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[7] );
-      FFV2_2<W_ACCESS, CI_ACCESS>( w_fp[6], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
+      helas_CD_FFV1_2( w_fp[5], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[7] );
+      helas_CI_FFV2_2( w_fp[6], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -351,10 +355,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 12 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV2_1<W_ACCESS, CI_ACCESS>( w_fp[1], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
+      helas_CI_FFV2_1( w_fp[1], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -365,12 +369,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 12 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[6] );
-      FFV2_2<W_ACCESS, CI_ACCESS>( w_fp[5], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[9] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[6] );
+      helas_CI_FFV2_2( w_fp[5], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -384,7 +388,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -395,11 +399,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 12 ***
 
       // Wavefunction(s) for diagram number 5
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[10] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[10], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[10] );
+      helas_CD_FFV1P0_3( w_fp[10], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[1], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[1], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -413,7 +417,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[8], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[8], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -424,10 +428,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 12 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_1( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[3], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[3], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -438,10 +442,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 8 OF 12 ***
 
       // Wavefunction(s) for diagram number 8
-      FFV2_1<W_ACCESS, CI_ACCESS>( w_fp[3], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[10] );
+      helas_CI_FFV2_1( w_fp[3], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 8
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -452,10 +456,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 9 OF 12 ***
 
       // Wavefunction(s) for diagram number 9
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[9], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_2( w_fp[9], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 9
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -466,10 +470,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 12 ***
 
       // Wavefunction(s) for diagram number 10
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_VVV1P0_1( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 10
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -483,7 +487,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 11
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -494,10 +498,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 12 ***
 
       // Wavefunction(s) for diagram number 12
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[8], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_1( w_fp[8], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 12
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[5], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[5], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/check_sa.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/check_sa.cc
index aee105f269..35a9fef55a 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/check_sa.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/check_sa.cc
@@ -971,6 +971,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.cc
index fbfb3cebcb..5185db1f86 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.cc
@@ -207,7 +207,9 @@ namespace mg5amcCpu
     using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -219,7 +221,9 @@ namespace mg5amcCpu
     using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
     using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
     using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#ifndef MGONGPU_LINKER_HELAMPS
     using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+#endif
     using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
     using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -335,12 +339,12 @@ namespace mg5amcCpu
 
       vxxxxx<M_ACCESS, W_ACCESS>( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 );
 
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[6] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[7] );
-      FFV2_2<W_ACCESS, CI_ACCESS>( w_fp[6], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
+      helas_CD_FFV1_2( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[6] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[7] );
+      helas_CI_FFV2_2( w_fp[6], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 1
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[8], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[8], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -351,10 +355,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 2 OF 12 ***
 
       // Wavefunction(s) for diagram number 2
-      FFV2_1<W_ACCESS, CI_ACCESS>( w_fp[1], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
+      helas_CI_FFV2_1( w_fp[1], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[8] );
 
       // Amplitude(s) for diagram number 2
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[6], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[6], w_fp[8], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -365,12 +369,12 @@ namespace mg5amcCpu
       // *** DIAGRAM 3 OF 12 ***
 
       // Wavefunction(s) for diagram number 3
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[2], w_fp[5], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[6] );
-      FFV2_2<W_ACCESS, CI_ACCESS>( w_fp[0], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[9] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_1( w_fp[2], w_fp[5], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[6] );
+      helas_CI_FFV2_2( w_fp[0], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[9] );
+      helas_CD_FFV1P0_3( w_fp[3], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 3
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -384,7 +388,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 4
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[0], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -395,11 +399,11 @@ namespace mg5amcCpu
       // *** DIAGRAM 5 OF 12 ***
 
       // Wavefunction(s) for diagram number 5
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[3], w_fp[5], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[10] );
-      FFV1P0_3<W_ACCESS, CD_ACCESS>( w_fp[10], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_2( w_fp[3], w_fp[5], COUPs[0], 1.0, cIPD[0], cIPD[2], w_fp[10] );
+      helas_CD_FFV1P0_3( w_fp[10], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 5
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[1], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[1], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -413,7 +417,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 6
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[0], w_fp[8], w_fp[3], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -424,10 +428,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 7 OF 12 ***
 
       // Wavefunction(s) for diagram number 7
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[3] );
+      helas_CD_FFV1_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[3] );
 
       // Amplitude(s) for diagram number 7
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[3], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[3], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -438,10 +442,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 8 OF 12 ***
 
       // Wavefunction(s) for diagram number 8
-      FFV2_1<W_ACCESS, CI_ACCESS>( w_fp[3], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[10] );
+      helas_CI_FFV2_1( w_fp[3], w_fp[4], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 8
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[0], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -452,10 +456,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 9 OF 12 ***
 
       // Wavefunction(s) for diagram number 9
-      FFV1_2<W_ACCESS, CD_ACCESS>( w_fp[9], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_2( w_fp[9], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 9
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[10], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -466,10 +470,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 10 OF 12 ***
 
       // Wavefunction(s) for diagram number 10
-      VVV1P0_1<W_ACCESS, CD_ACCESS>( w_fp[5], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[10] );
+      helas_CD_VVV1P0_1( w_fp[5], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 10
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[9], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[9], w_fp[1], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -483,7 +487,7 @@ namespace mg5amcCpu
       // (none)
 
       // Amplitude(s) for diagram number 11
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[0], w_fp[8], w_fp[10], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
@@ -494,10 +498,10 @@ namespace mg5amcCpu
       // *** DIAGRAM 12 OF 12 ***
 
       // Wavefunction(s) for diagram number 12
-      FFV1_1<W_ACCESS, CD_ACCESS>( w_fp[8], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
+      helas_CD_FFV1_1( w_fp[8], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] );
 
       // Amplitude(s) for diagram number 12
-      FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( w_fp[0], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
+      helas_CD_FFV1_0( w_fp[0], w_fp[10], w_fp[7], COUPs[0], 1.0, &amp_fp[0] );
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
       if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] );
       if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] );
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/check_sa.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/check_sa.cc
index aee105f269..35a9fef55a 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/check_sa.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/check_sa.cc
@@ -971,6 +971,8 @@ main( int argc, char** argv )
               << " [" << process.getCompiler() << "]"
 #ifdef MGONGPU_INLINE_HELAMPS
               << " [inlineHel=1]"
+#elif defined MGONGPU_LINKER_HELAMPS
+              << " [inlineHel=L]"
 #else
               << " [inlineHel=0]"
 #endif
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk
index 20d8ded718..bd9ad881b3 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk
@@ -556,8 +556,11 @@ $(info HELINL='$(HELINL)')
 ifeq ($(HELINL),1)
   CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
   GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
+else ifeq ($(HELINL),L)
+  CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
+  GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
 else ifneq ($(HELINL),0)
-  $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
+  $(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
 endif
 
 # Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
@@ -660,7 +663,6 @@ override RUNTIME =
 #=== Makefile TARGETS and build rules below
 #===============================================================================
 
-
 ifeq ($(GPUCC),)
   cxx_checkmain=$(BUILDDIR)/check_cpp.exe
   cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
@@ -789,6 +791,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
 gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
 endif
 
+# Add object files and special build flags only for the HELINL=L mode
+ifeq ($(HELINL),L)
+cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
+gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
+$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+endif
+
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
@@ -799,12 +809,12 @@ ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
-	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
 #else
-#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+#	$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
 #endif
 endif
 
@@ -975,6 +985,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
 ###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/src/HelAmps_sm_no_b_mass.h b/epochX/cudacpp/nobm_pp_ttW.mad/src/HelAmps_sm_no_b_mass.h
index 237f2d4e6c..16e528af73 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/src/HelAmps_sm_no_b_mass.h
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/src/HelAmps_sm_no_b_mass.h
@@ -1166,8 +1166,214 @@ namespace mg5amcCpu
     return;
   }
 
+  //==========================================================================
+
+#ifndef MGONGPU_LINKER_HELAMPS
+
+#define helas_CD_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_0 FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_1 FFV1_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_1 FFV1_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1_2 FFV1_2<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1_2 FFV1_2<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV1P0_3 FFV1P0_3<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV1P0_3 FFV1P0_3<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV2_1 FFV2_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV2_1 FFV2_1<W_ACCESS, CI_ACCESS>
+#define helas_CD_FFV2_2 FFV2_2<W_ACCESS, CD_ACCESS>
+#define helas_CI_FFV2_2 FFV2_2<W_ACCESS, CI_ACCESS>
+#define helas_CD_VVV1P0_1 VVV1P0_1<W_ACCESS, CD_ACCESS>
+#define helas_CI_VVV1P0_1 VVV1P0_1<W_ACCESS, CI_ACCESS>
+
+#else
+
+#define helas_CD_FFV1_0 linker_CD_FFV1_0
+#define helas_CI_FFV1_0 linker_CI_FFV1_0
+#define helas_CD_FFV1_1 linker_CD_FFV1_1
+#define helas_CI_FFV1_1 linker_CI_FFV1_1
+#define helas_CD_FFV1_2 linker_CD_FFV1_2
+#define helas_CI_FFV1_2 linker_CI_FFV1_2
+#define helas_CD_FFV1P0_3 linker_CD_FFV1P0_3
+#define helas_CI_FFV1P0_3 linker_CI_FFV1P0_3
+#define helas_CD_FFV2_1 linker_CD_FFV2_1
+#define helas_CI_FFV2_1 linker_CI_FFV2_1
+#define helas_CD_FFV2_2 linker_CD_FFV2_2
+#define helas_CI_FFV2_2 linker_CI_FFV2_2
+#define helas_CD_VVV1P0_1 linker_CD_VVV1P0_1
+#define helas_CI_VVV1P0_1 linker_CI_VVV1P0_1
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV2_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV2_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV2_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
   //--------------------------------------------------------------------------
 
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV2_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] );
+
+  //--------------------------------------------------------------------------
+
+#endif
+
+  //==========================================================================
+
 } // end namespace
 
 #endif // HelAmps_sm_no_b_mass_H
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/src/cudacpp_config.mk b/epochX/cudacpp/nobm_pp_ttW.mad/src/cudacpp_config.mk
index 438fcd1661..8c0c049857 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/src/cudacpp_config.mk
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/src/cudacpp_config.mk
@@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
 
-override SUPPORTED_HELINLS = 0 1
+override SUPPORTED_HELINLS = L 0 1
 ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
   $(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
 endif
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/nobm_pp_ttW.mad/src/mgOnGpuConfig.h
index 7c6a082392..113fb25b3f 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/src/mgOnGpuConfig.h
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/src/mgOnGpuConfig.h
@@ -29,7 +29,7 @@
 
 // Choose if curand is supported for generating random numbers
 // For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
-// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
+// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
 // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
 #if defined __HIPCC__
 #define MGONGPU_HAS_NO_CURAND 1
@@ -45,7 +45,7 @@
 
 // Choose if hiprand is supported for generating random numbers
 // For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
-// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
+// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
 // (there may exist HIP installations which do not include hiprand?)
 #if defined __CUDACC__ // this must be __CUDACC__ (not MGONGPUCPP_GPUIMPL)
 #define MGONGPU_HAS_NO_HIPRAND 1
@@ -77,9 +77,16 @@
 // Choose whether to inline all HelAmps functions
 // This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
 // By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
 //#undef MGONGPU_INLINE_HELAMPS // default
 ////#define MGONGPU_INLINE_HELAMPS 1
 
+// Choose whether to compile and link all HelAmps functions as separate object files
+// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
+// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
+//#undef MGONGPU_LINKER_HELAMPS // default
+////#define MGONGPU_LINKER_HELAMPS 1
+
 // Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
 // This optimization can gain 20% in CUDA in eemumu (issue #39)
 // By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
@@ -156,6 +163,11 @@
 #endif
 #endif
 
+// SANITY CHECKS (HelAmps)
+#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
+#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
+#endif
+
 // NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
 namespace mgOnGpu
 {
diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt
index a93cb1b567..86faba3ce9 100644
--- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt
+++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+('WARNING: loading of madgraph too slow!!!', 3.124648332595825)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -56,7 +57,7 @@ set zerowidth_tchannel F
 define j = p
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005572795867919922 [0m
+[1;32mDEBUG: model prefixing  takes 0.005530357360839844 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -167,7 +168,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~
 INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ 
 INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ 
 INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ 
-5 processes with 7 diagrams generated in 0.030 s
+5 processes with 7 diagrams generated in 0.028 s
 Total: 5 processes with 7 diagrams
 add process p p > t t~ j @1
 INFO: Checking for minimal orders which gives processes. 
@@ -207,7 +208,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~
 INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g 
 INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ 
 INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g 
-13 processes with 76 diagrams generated in 0.142 s
+13 processes with 76 diagrams generated in 0.135 s
 Total: 18 processes with 83 diagrams
 add process p p > t t~ j j @2
 INFO: Checking for minimal orders which gives processes. 
@@ -373,7 +374,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~
 INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ 
 INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ 
 INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. 
-65 processes with 1119 diagrams generated in 1.862 s
+65 processes with 1119 diagrams generated in 1.822 s
 Total: 83 processes with 1202 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -494,205 +495,205 @@ INFO: Combined process c c~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED
 INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 
 INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 
 INFO: Creating files in directory P2_gg_ttxgg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 105 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 105 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P2_gg_ttxuux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P2_gu_ttxgu 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P2_gux_ttxgux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P2_uux_ttxgg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P1_gg_ttxg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxg 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P2_uu_ttxuu 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P2_uux_ttxuux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P2_uxux_ttxuxux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P2_uc_ttxuc 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P2_uux_ttxccx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P2_ucx_ttxucx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P2_uxcx_ttxuxcx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P1_gu_ttxu 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gu_ttxu 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P1_gux_ttxux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gux_ttxux 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P1_uux_ttxg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxg 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P0_gg_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1629][0m [0m
 INFO: Creating files in directory P0_uux_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttx 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 1 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1} [1;30m[model_handling.py at line 1548][0m [0m
-Generated helas calls for 18 subprocesses (372 diagrams) in 1.313 s
-Wrote files for 810 helas calls in 2.875 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 1 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1} [1;30m[model_handling.py at line 1629][0m [0m
+Generated helas calls for 18 subprocesses (372 diagrams) in 1.284 s
+Wrote files for 810 helas calls in 2.748 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
@@ -700,14 +701,14 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.343 s
+ALOHA: aloha creates 5 routines in  0.339 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 10 routines in  0.325 s
+ALOHA: aloha creates 10 routines in  0.313 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -722,6 +723,8 @@ ALOHA: aloha creates 10 routines in  0.325 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.h
@@ -885,10 +888,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m10.733s
-user	0m9.674s
-sys	0m0.936s
-Code generation completed in 11 seconds
+real	0m13.421s
+user	0m9.432s
+sys	0m0.958s
+Code generation completed in 14 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt
index 10b6e52273..6053b81b8f 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+('WARNING: loading of madgraph too slow!!!', 3.127755641937256)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -72,7 +73,7 @@ INFO: load vertices
 [1;32mDEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1) [0m
 [1;32mDEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3) [0m
 [1;32mDEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1) [0m
-[1;32mDEBUG: model prefixing  takes 0.14131855964660645 [0m
+[1;32mDEBUG: model prefixing  takes 0.13721013069152832 [0m
 INFO: Change particles name to pass to MG5 convention 
 Defined multiparticle p = g u c d s u~ c~ d~ s~
 Defined multiparticle j = g u c d s u~ c~ d~ s~
@@ -87,7 +88,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 
 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1  
 INFO: Process has 72 diagrams 
-1 processes with 72 diagrams generated in 3.793 s
+1 processes with 72 diagrams generated in 3.743 s
 Total: 1 processes with 72 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -109,18 +110,18 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 
 INFO: Processing color information for process: g g > t t~ t t~ @1 
 INFO: Creating files in directory P1_gg_ttxttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ t t~ WEIGHTED<=4 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxttx 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 70 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [1;30m[model_handling.py at line 1548][0m [0m
-Generated helas calls for 1 subprocesses (72 diagrams) in 0.192 s
-Wrote files for 119 helas calls in 0.397 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 70 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [1;30m[model_handling.py at line 1629][0m [0m
+Generated helas calls for 1 subprocesses (72 diagrams) in 0.187 s
+Wrote files for 119 helas calls in 0.385 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV5 routines[0m
@@ -128,14 +129,14 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV9 routines[0m
 ALOHA: aloha creates VVVV10 routines[0m
-ALOHA: aloha creates 5 routines in  0.329 s
+ALOHA: aloha creates 5 routines in  0.317 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV5 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV9 routines[0m
 ALOHA: aloha creates VVVV10 routines[0m
-ALOHA: aloha creates 10 routines in  0.342 s
+ALOHA: aloha creates 10 routines in  0.328 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -147,6 +148,8 @@ ALOHA: aloha creates 10 routines in  0.342 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV10
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
 INFO: Created file HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.h
@@ -176,10 +179,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m7.328s
-user	0m6.983s
-sys	0m0.308s
-Code generation completed in 8 seconds
+real	0m10.174s
+user	0m6.874s
+sys	0m0.284s
+Code generation completed in 10 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.pdf b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.pdf
index 421aa3be878f13363e59bdf035ffc4d7f7ee59de..39cdc07f21816faade12ca39a135028897eddf68 100644
GIT binary patch
delta 41
tcmcciN$lDuv4$4L7N!>F7M3ln-*zzCOqblrs=#Cip&9L_TkT|(0RUex4t)Rs

delta 41
tcmcciN$lDuv4$4L7N!>F7M3ln-*zx+Oqblrs=%ZPp&2!&TkT|(0RUR$4l4iv

diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
index 1c56450caa..9be022da94 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+('WARNING: loading of madgraph too slow!!!', 3.124478340148926)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -72,7 +73,7 @@ INFO: load vertices
 [1;32mDEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1) [0m
 [1;32mDEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3) [0m
 [1;32mDEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1) [0m
-[1;32mDEBUG: model prefixing  takes 0.14032483100891113 [0m
+[1;32mDEBUG: model prefixing  takes 0.13663244247436523 [0m
 INFO: Change particles name to pass to MG5 convention 
 Defined multiparticle p = g u c d s u~ c~ d~ s~
 Defined multiparticle j = g u c d s u~ c~ d~ s~
@@ -87,7 +88,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 
 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1  
 INFO: Process has 72 diagrams 
-1 processes with 72 diagrams generated in 3.762 s
+1 processes with 72 diagrams generated in 3.699 s
 Total: 1 processes with 72 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt
 Load PLUGIN.CUDACPP_OUTPUT
@@ -109,14 +110,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. 
-Generated helas calls for 1 subprocesses (72 diagrams) in 0.192 s
+Generated helas calls for 1 subprocesses (72 diagrams) in 0.186 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV5 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV9 routines[0m
 ALOHA: aloha creates VVVV10 routines[0m
-ALOHA: aloha creates 5 routines in  0.325 s
+ALOHA: aloha creates 5 routines in  0.317 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -128,6 +129,8 @@ ALOHA: aloha creates 5 routines in  0.325 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV10
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
 INFO: Created file HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.h
@@ -136,7 +139,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. 
 quit
 
-real	0m5.180s
-user	0m5.084s
-sys	0m0.064s
-Code generation completed in 6 seconds
+real	0m8.078s
+user	0m4.991s
+sys	0m0.067s
+Code generation completed in 8 seconds
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt
index d85917df10..1e8f1c0d96 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+('WARNING: loading of madgraph too slow!!!', 3.1275925636291504)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -549,7 +550,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1  
 INFO: Process has 6 diagrams 
-1 processes with 6 diagrams generated in 0.127 s
+1 processes with 6 diagrams generated in 0.122 s
 Total: 1 processes with 6 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -571,29 +572,29 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t1 t1~ @1 
 INFO: Creating files in directory P1_gg_t1t1x 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t1 t1~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_t1t1x 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [1;30m[model_handling.py at line 1629][0m [0m
 Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s
-Wrote files for 16 helas calls in 0.084 s
+Wrote files for 16 helas calls in 0.083 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VSS1 routines[0m
 ALOHA: aloha creates VVSS1 routines[0m
-ALOHA: aloha creates 3 routines in  0.195 s
+ALOHA: aloha creates 3 routines in  0.184 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VSS1 routines[0m
 ALOHA: aloha creates VVSS1 routines[0m
-ALOHA: aloha creates 6 routines in  0.186 s
+ALOHA: aloha creates 6 routines in  0.180 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
@@ -601,6 +602,8 @@ ALOHA: aloha creates 6 routines in  0.186 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVSS1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./HelAmps_MSSM_SLHA2.h
 INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.h
@@ -630,10 +633,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m3.199s
-user	0m2.720s
-sys	0m0.313s
-Code generation completed in 3 seconds
+real	0m5.957s
+user	0m2.652s
+sys	0m0.302s
+Code generation completed in 6 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt
index a22798887c..19a94c3e16 100644
--- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt
+++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+('WARNING: loading of madgraph too slow!!!', 3.128488302230835)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -549,7 +550,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1  
 INFO: Process has 6 diagrams 
-1 processes with 6 diagrams generated in 0.126 s
+1 processes with 6 diagrams generated in 0.123 s
 Total: 1 processes with 6 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1
 Load PLUGIN.CUDACPP_OUTPUT
@@ -576,7 +577,7 @@ ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VSS1 routines[0m
 ALOHA: aloha creates VVSS1 routines[0m
-ALOHA: aloha creates 3 routines in  0.189 s
+ALOHA: aloha creates 3 routines in  0.185 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
@@ -584,6 +585,8 @@ ALOHA: aloha creates 3 routines in  0.189 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVSS1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./HelAmps_MSSM_SLHA2.h
 INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.h
@@ -592,7 +595,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. 
 quit
 
-real	0m1.364s
-user	0m1.286s
-sys	0m0.065s
-Code generation completed in 1 seconds
+real	0m4.349s
+user	0m1.276s
+sys	0m0.068s
+Code generation completed in 4 seconds
diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt
index c15c42381b..a9ec510ec7 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt
+++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+('WARNING: loading of madgraph too slow!!!', 3.125643014907837)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -549,7 +550,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.125 s
+1 processes with 3 diagrams generated in 0.118 s
 Total: 1 processes with 3 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -571,33 +572,35 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
 INFO: Creating files in directory P1_gg_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1152][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1523][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1547][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1548][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1604][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1628][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1629][0m [0m
 Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
-Wrote files for 10 helas calls in 0.077 s
+Wrote files for 10 helas calls in 0.074 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.139 s
+ALOHA: aloha creates 2 routines in  0.137 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 4 routines in  0.140 s
+ALOHA: aloha creates 4 routines in  0.133 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./HelAmps_MSSM_SLHA2.h
 INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./Parameters_MSSM_SLHA2.h
@@ -627,10 +630,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m3.173s
-user	0m2.612s
-sys	0m0.319s
-Code generation completed in 3 seconds
+real	0m5.828s
+user	0m2.532s
+sys	0m0.295s
+Code generation completed in 6 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt
index cc7498186d..82229a9c08 100644
--- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt
+++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+('WARNING: loading of madgraph too slow!!!', 3.126711368560791)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -54,6 +55,9 @@ set stdout_level DEBUG
 set output information to level: 10
 set zerowidth_tchannel F
 import model MSSM_SLHA2
+INFO: load particles 
+INFO: load vertices 
+[1;32mDEBUG: model prefixing  takes 0.8969454765319824 [0m
 INFO: Restrict model MSSM_SLHA2 with file models/MSSM_SLHA2/restrict_default.dat . 
 INFO: Detect SLHA2 format. keeping restricted parameter in the param_card 
 [1;32mDEBUG: Simplifying conditional expressions [0m
@@ -549,7 +553,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.121 s
+1 processes with 3 diagrams generated in 0.110 s
 Total: 1 processes with 3 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt
 Load PLUGIN.CUDACPP_OUTPUT
@@ -575,13 +579,15 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.139 s
+ALOHA: aloha creates 2 routines in  0.135 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./HelAmps_MSSM_SLHA2.h
 INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/../SubProcesses/./HelAmps.cc
+INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./Parameters_MSSM_SLHA2.h
@@ -590,7 +596,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. 
 quit
 
-real	0m1.303s
-user	0m1.226s
-sys	0m0.063s
-Code generation completed in 1 seconds
+real	0m5.323s
+user	0m2.250s
+sys	0m0.065s
+Code generation completed in 5 seconds

From 86d2393e84f2ba462b464869c59d1270fa656685 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Fri, 20 Sep 2024 16:55:03 +0300
Subject: [PATCH 43/50] [helas] in gg_tt.mad cudacpp.mk, restrict the '-rdc'
 flag (for HELINL=L) to cuda only as it does not apply to hip

The hip compilation of CPPProcess.cc now fails as
ccache /opt/rocm-6.0.3/bin/hipcc  -I. -I../../src   -O2 --offload-arch=gfx90a -target x86_64-linux-gnu -DHIP_PLATFORM=amd -DHIP_FAST_MATH -I/opt/rocm-6.0.3/include/ -std=c++17 -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE -DMGONGPU_LINKER_HELAMPS  -fPIC -c -x hip CPPProcess.cc -o CPPProcess_hip.o
lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0(double const*, double const*, double const*, double const*, double, double*)
---
 epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
index bd9ad881b3..bd458e70e1 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
@@ -795,9 +795,11 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
 endif
+endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o

From 52751a7d095ac5bff4829b30dfc18ad00ff62560 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Fri, 20 Sep 2024 17:02:02 +0300
Subject: [PATCH 44/50] [helas] in gg_tt.mad cudacpp.mk, add -fgpu-rdc to the
 CPPProcess.cc compilation on hip for HELINL=L

The hip link of check_hip.exe now fails with
ccache /opt/rocm-6.0.3/bin/hipcc -o check_hip.exe ./check_sa_hip.o -L../../lib -lmg5amc_common_hip -Xlinker -rpath='$ORIGIN/../../lib'  -L../../lib -lmg5amc_gg_ttx_hip ./CommonRandomNumberKernel_hip.o ./RamboSamplingKernels_hip.o ./CurandRandomNumberKernel_hip.o ./HiprandRandomNumberKernel_hip.o  -L/opt/rocm-6.0.3/lib/ -lhiprand
ld.lld: error: undefined reference due to --no-allow-shlib-undefined: __hip_fatbin
---
 epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
index bd458e70e1..3e8755bf11 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
@@ -798,6 +798,8 @@ gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
 ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
 endif
 endif
 

From b2c8bb471e442d1489c2e1a4beee7197a39221e2 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Fri, 20 Sep 2024 17:04:50 +0300
Subject: [PATCH 45/50] [helas] in gg_tt.mad cudacpp.mk, add -fgpu-rdc
 --hip-link to the check_hip.exe link on hip for HELINL=L, the build succeeds
 but at runtime it fails

The execution fails with
./check_hip.exe -p 1 8 1
ERROR! assertGpu: 'shared object initialization failed' (303) in CPPProcess.cc:558

In addition, the hip link of fcheck_hip.exe fails with
ftn --cray-bypass-pkgconfig -craype-verbose -ffixed-line-length-132 -o fcheck_hip.exe ./fcheck_sa_fortran.o ./fsampler_hip.o -L../../lib -lmg5amc_common_hip -Xlinker -rpath='$ORIGIN/../../lib'  -lgfortran -L../../lib -lmg5amc_gg_ttx_hip ./CommonRandomNumberKernel_hip.o ./RamboSamplingKernels_hip.o -lstdc++ -L/opt/rocm-6.0.3/lib -lamdhip64
gfortran-13 -march=znver3 -D__CRAY_X86_TRENTO -D__CRAY_AMD_GFX90A -D__CRAYXT_COMPUTE_LINUX_TARGET -D__TARGET_LINUX__ -ffixed-line-length-132 -o fcheck_hip.exe ./fcheck_sa_fortran.o ./fsampler_hip.o -L../../lib -lmg5amc_common_hip -Xlinker -rpath=$ORIGIN/../../lib -lgfortran -L../../lib -lmg5amc_gg_ttx_hip ./CommonRandomNumberKernel_hip.o ./RamboSamplingKernels_hip.o -lstdc++ -L/opt/rocm-6.0.3/lib -lamdhip64 -Wl,-Bdynamic -Wl,--as-needed,-lgfortran,-lquadmath,--no-as-needed -Wl,--as-needed,-lpthread,--no-as-needed -Wl,--disable-new-dtags
/usr/lib64/gcc/x86_64-suse-linux/13/../../../../x86_64-suse-linux/bin/ld: ../../lib/libmg5amc_gg_ttx_hip.so: undefined reference to `__hip_fatbin'
---
 epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 1 +
 1 file changed, 1 insertion(+)

diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
index 3e8755bf11..40920b4e36 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
@@ -800,6 +800,7 @@ $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
 else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
 endif
 endif
 

From e4d9206d696e0584823c6ada80b62d1d1af7e8a4 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Fri, 20 Sep 2024 17:12:53 +0300
Subject: [PATCH 46/50] [helas] in gg_tt.mad cudacpp.mk, temporarely go back
 and try to use hipcc instead of gfortran to link fcheck_hip.exe: this links
 but it fails at runtime, will revert

Also add -gggdb for debugging. At runtime this fails with the usual #802.
It is now clear that this is in gpuMemcpyToSymbol (line 558)
And the error is precisely 'shared object initialization failed'

./fcheck_hip.exe 1 32 1
...
WARNING! Instantiate device Bridge (nevt=32, gpublocks=1, gputhreads=32, gpublocks*gputhreads=32)
ERROR! assertGpu: 'shared object initialization failed' (303) in CPPProcess.cc:558
fcheck_hip.exe: ./GpuRuntime.h:26: void assertGpu(hipError_t, const char *, int, bool): Assertion `code == gpuSuccess' failed.

Program received signal SIGABRT: Process abort signal.
Backtrace for this error:
0  0x14f947bff2e2 in ???
1  0x14f947bfe475 in ???
2  0x14f945f33dbf in ???
3  0x14f945f33d2b in ???
4  0x14f945f353e4 in ???
5  0x14f945f2bc69 in ???
6  0x14f945f2bcf1 in ???
7  0x14f947bcef96 in _Z9assertGpu10hipError_tPKcib
        at ./GpuRuntime.h:26
8  0x14f947bcef96 in _ZN9mg5amcGpu10CPPProcessC2Ebb
        at /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc:558
9  0x14f947bd2cf3 in _ZN9mg5amcGpu6BridgeIdEC2Ejjj
        at ./Bridge.h:268
10  0x14f947bd678e in fbridgecreate_
        at /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/fbridge.cc:54
11  0x2168fd in ???
12  0x216bfe in ???
13  0x14f945f1e24c in ???
14  0x216249 in _start
        at ../sysdeps/x86_64/start.S:120
15  0xffffffffffffffff in ???
Aborted
---
 epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
index 40920b4e36..9d11386166 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
@@ -240,7 +240,7 @@ else ifeq ($(BACKEND),hip)
   GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt))
 
   # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland)
-  ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY
+  GPUFLAGS += -ggdb # FOR DEBUGGING ONLY
 
   # AMD HIP architecture flags
   GPUARCHFLAGS = --offload-arch=gfx90a
@@ -893,7 +893,8 @@ endif
 $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe)
 ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802
-	$(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64
+#	$(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 # fails to link
+	$(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -fgpu-rdc --hip-link # links but crashes
 else
 	$(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe)
 endif

From 2fda261999388914ea05ca3a7adfb195d803cdc4 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Fri, 20 Sep 2024 18:10:13 +0300
Subject: [PATCH 47/50] [helas] in gg_tt.mad cudacpp.mk, go back to using
 gfortran instead of hipcc to link fcheck_hip.exe

Revert "[helas] in gg_tt.mad cudacpp.mk, temporarely go back and try to use hipcc instead of gfortran to link fcheck_hip.exe: this links but it fails at runtime, will revert"
This reverts commit 988419b7ae9e266743e07116170767d415f237d5.

NOTE: I tried to use FC=hipcc and this also compiles the fortran ok!
Probably it internally uses flang from llvm #804

The problem however is that there is no lowercase 'main' in fcheck_sa_fortran.o, only an uppercase 'MAIN_'.

Summary of the status" HELINL=L "rdc" is not supported on our AMD GPUs for now.
---
 epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
index 9d11386166..40920b4e36 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
@@ -240,7 +240,7 @@ else ifeq ($(BACKEND),hip)
   GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt))
 
   # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland)
-  GPUFLAGS += -ggdb # FOR DEBUGGING ONLY
+  ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY
 
   # AMD HIP architecture flags
   GPUARCHFLAGS = --offload-arch=gfx90a
@@ -893,8 +893,7 @@ endif
 $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe)
 ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802
-#	$(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 # fails to link
-	$(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -fgpu-rdc --hip-link # links but crashes
+	$(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64
 else
 	$(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe)
 endif

From f021a8985209f0ea9f19015a1c670ed3bc910b32 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Fri, 20 Sep 2024 17:24:40 +0200
Subject: [PATCH 48/50] [helas] backport to CODEGEN the gg_tt.mad changes in
 cudacpp.mk to try and support HELINL=L on AMD GPUs via HIP (still incomplete)

---
 .../madgraph/iolibs/template_files/gpu/cudacpp.mk            | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk
index 238a371086..bb902e0f34 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk
@@ -795,8 +795,13 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
+endif
 endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries

From be5a8da594b9f05ccc7d25c20368db8c393acd69 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Wed, 25 Sep 2024 15:01:51 +0200
Subject: [PATCH 49/50] [helas] regenerate all processes - also add to repo
 some missing files from nobm_pp_ttW.mad (git add nobm_pp_ttW.mad)

---
 .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt   |  22 +-
 .../ee_mumu.mad/SubProcesses/cudacpp.mk       |   5 +
 .../CODEGEN_cudacpp_ee_mumu_log.txt           |  13 +-
 .../ee_mumu.sa/SubProcesses/cudacpp.mk        |   5 +
 .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt       |  22 +-
 .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt    |  16 +-
 .../cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk  |   5 +
 .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt |  27 +-
 .../gg_tt01g.mad/SubProcesses/cudacpp.mk      |   5 +
 .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt     |  26 +-
 .../gg_ttg.mad/SubProcesses/cudacpp.mk        |   5 +
 .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt  |  18 +-
 .../cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk |   5 +
 .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt   |  27 +-
 .../gg_ttgg.mad/SubProcesses/cudacpp.mk       |   5 +
 .../CODEGEN_cudacpp_gg_ttgg_log.txt           |  17 +-
 .../gg_ttgg.sa/SubProcesses/cudacpp.mk        |   5 +
 .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt |  27 +-
 .../gg_ttggg.mad/SubProcesses/cudacpp.mk      |   5 +
 .../CODEGEN_cudacpp_gg_ttggg_log.txt          |  19 +-
 .../gg_ttggg.sa/SubProcesses/cudacpp.mk       |   5 +
 .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt     |  27 +-
 .../gq_ttq.mad/SubProcesses/cudacpp.mk        |   5 +
 .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt  |  19 +-
 .../cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk |   5 +
 .../CODEGEN_mad_heft_gg_bb_log.txt            |  23 +-
 .../heft_gg_bb.mad/SubProcesses/cudacpp.mk    |   5 +
 .../CODEGEN_cudacpp_heft_gg_bb_log.txt        |  15 +-
 .../heft_gg_bb.sa/SubProcesses/cudacpp.mk     |   5 +
 .../CODEGEN_mad_nobm_pp_ttW_log.txt           |  29 +-
 .../nobm_pp_ttW.mad/Source/MODEL/couplings3.f |  20 ++
 .../Source/PDF/lep_densities/isrbetll/eepdf.f | 113 ++++++++
 .../PDF/lep_densities/isrbetll/gridpdfaux.f   | 140 +++++++++
 .../PDF/lep_densities/isrbetmll/eepdf.f       | 112 +++++++
 .../PDF/lep_densities/isrbetmll/gridpdfaux.f  | 140 +++++++++
 .../Source/PDF/lep_densities/isrbetmll/info   |   1 +
 .../nobm_pp_ttW.mad/SubProcesses/HelAmps.cc   | 273 ++++++++++++++++++
 .../SubProcesses/P0_dux_ttxwm/HelAmps.cc      |   1 +
 .../SubProcesses/P0_udx_ttxwp/HelAmps.cc      |   1 +
 .../SubProcesses/P1_dux_ttxwmg/HelAmps.cc     |   1 +
 .../SubProcesses/P1_gd_ttxwmu/HelAmps.cc      |   1 +
 .../SubProcesses/P1_gdx_ttxwpux/HelAmps.cc    |   1 +
 .../SubProcesses/P1_gu_ttxwpd/HelAmps.cc      |   1 +
 .../SubProcesses/P1_gux_ttxwmdx/HelAmps.cc    |   1 +
 .../SubProcesses/P1_udx_ttxwpg/HelAmps.cc     |   1 +
 .../nobm_pp_ttW.mad/SubProcesses/cudacpp.mk   |   5 +
 .../CODEGEN_mad_pp_tt012j_log.txt             |  31 +-
 .../pp_tt012j.mad/SubProcesses/cudacpp.mk     |   5 +
 .../CODEGEN_mad_smeft_gg_tttt_log.txt         |  27 +-
 .../smeft_gg_tttt.mad/SubProcesses/cudacpp.mk |   5 +
 .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt     |  19 +-
 .../smeft_gg_tttt.sa/SubProcesses/cudacpp.mk  |   5 +
 .../CODEGEN_mad_susy_gg_t1t1_log.txt          |  23 +-
 .../susy_gg_t1t1.mad/SubProcesses/cudacpp.mk  |   5 +
 .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt      |  15 +-
 .../susy_gg_t1t1.sa/SubProcesses/cudacpp.mk   |   5 +
 .../CODEGEN_mad_susy_gg_tt_log.txt            |  23 +-
 .../susy_gg_tt.mad/SubProcesses/cudacpp.mk    |   5 +
 .../CODEGEN_cudacpp_susy_gg_tt_log.txt        |  15 +-
 .../susy_gg_tt.sa/SubProcesses/cudacpp.mk     |   5 +
 60 files changed, 1207 insertions(+), 210 deletions(-)
 create mode 100644 epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/couplings3.f
 create mode 100644 epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/lep_densities/isrbetll/eepdf.f
 create mode 100644 epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/lep_densities/isrbetll/gridpdfaux.f
 create mode 100644 epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/lep_densities/isrbetmll/eepdf.f
 create mode 100644 epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/lep_densities/isrbetmll/gridpdfaux.f
 create mode 100644 epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/lep_densities/isrbetmll/info
 create mode 100644 epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/HelAmps.cc
 create mode 120000 epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/HelAmps.cc
 create mode 120000 epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/HelAmps.cc
 create mode 120000 epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/HelAmps.cc
 create mode 120000 epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/HelAmps.cc
 create mode 120000 epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/HelAmps.cc
 create mode 120000 epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/HelAmps.cc
 create mode 120000 epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/HelAmps.cc
 create mode 120000 epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/HelAmps.cc

diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
index 9231cca725..848d3bff3d 100644
--- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
+++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
@@ -57,7 +57,7 @@ generate e+ e- > mu+ mu-
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.0054187774658203125 [0m
+[1;32mDEBUG: model prefixing  takes 0.00563359260559082 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -152,12 +152,18 @@ INFO: Process has 2 diagrams
 1 processes with 2 diagrams generated in 0.004 s
 Total: 1 processes with 2 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=32
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_ee_mumu 
@@ -182,19 +188,19 @@ INFO: Finding symmetric diagrams for subprocess group epem_mupmum
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1628][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1629][0m [0m
 Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s
-Wrote files for 8 helas calls in 0.069 s
+Wrote files for 8 helas calls in 0.072 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
-ALOHA: aloha creates 3 routines in  0.202 s
+ALOHA: aloha creates 3 routines in  0.206 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
 ALOHA: aloha creates FFV2_4 routines[0m
-ALOHA: aloha creates 7 routines in  0.256 s
+ALOHA: aloha creates 7 routines in  0.263 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2
@@ -236,10 +242,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.045s
-user	0m1.764s
-sys	0m0.267s
-Code generation completed in 2 seconds
+real	0m2.125s
+user	0m1.839s
+sys	0m0.259s
+Code generation completed in 3 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk
index bd9ad881b3..40920b4e36 100644
--- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk
@@ -795,8 +795,13 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
+endif
 endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
index 66b00dcdfc..fcbb22f8af 100644
--- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
+++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
@@ -2,7 +2,6 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
-('WARNING: loading of madgraph too slow!!!', 0.9350659847259521)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -58,7 +57,7 @@ generate e+ e- > mu+ mu-
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005448341369628906 [0m
+[1;32mDEBUG: model prefixing  takes 0.006380796432495117 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -153,6 +152,8 @@ INFO: Process has 2 diagrams
 1 processes with 2 diagrams generated in 0.004 s
 Total: 1 processes with 2 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_ee_mumu
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
@@ -178,7 +179,7 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
 ALOHA: aloha creates FFV2_4 routines[0m
-ALOHA: aloha creates 4 routines in  0.268 s
+ALOHA: aloha creates 4 routines in  0.277 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2
@@ -199,7 +200,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. 
 quit
 
-real	0m1.485s
-user	0m1.339s
-sys	0m0.076s
+real	0m0.911s
+user	0m0.709s
+sys	0m0.061s
 Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk
index bd9ad881b3..40920b4e36 100644
--- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk
@@ -795,8 +795,13 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
+endif
 endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
index 76275688f6..b3fdcd0b2d 100644
--- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
+++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
@@ -57,7 +57,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.006117820739746094 [0m
+[1;32mDEBUG: model prefixing  takes 0.005700588226318359 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -150,15 +150,21 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.009 s
+1 processes with 3 diagrams generated in 0.008 s
 Total: 1 processes with 3 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=32
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_gg_tt 
@@ -183,16 +189,16 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1628][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1629][0m [0m
 Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
-Wrote files for 10 helas calls in 0.072 s
+Wrote files for 10 helas calls in 0.074 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.151 s
+ALOHA: aloha creates 2 routines in  0.150 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 4 routines in  0.138 s
+ALOHA: aloha creates 4 routines in  0.137 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -230,9 +236,9 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.087s
-user	0m1.660s
-sys	0m0.268s
+real	0m1.934s
+user	0m1.651s
+sys	0m0.280s
 Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt
index 3ebd41d53b..323ed49835 100644
--- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt
+++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt
@@ -57,7 +57,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005710601806640625 [0m
+[1;32mDEBUG: model prefixing  takes 0.005724668502807617 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -150,9 +150,11 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.009 s
+1 processes with 3 diagrams generated in 0.008 s
 Total: 1 processes with 3 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
@@ -176,7 +178,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.142 s
+ALOHA: aloha creates 2 routines in  0.146 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -193,7 +195,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. 
 quit
 
-real	0m0.529s
-user	0m0.473s
-sys	0m0.054s
-Code generation completed in 1 seconds
+real	0m0.550s
+user	0m0.490s
+sys	0m0.047s
+Code generation completed in 0 seconds
diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk
index bd9ad881b3..40920b4e36 100644
--- a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk
@@ -795,8 +795,13 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
+endif
 endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt
index ec542bea1e..8dc238a8ca 100644
--- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt
+++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt
@@ -2,7 +2,6 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
-('WARNING: loading of madgraph too slow!!!', 3.125483751296997)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -58,7 +57,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005392551422119141 [0m
+[1;32mDEBUG: model prefixing  takes 0.0057926177978515625 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -151,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.009 s
+1 processes with 3 diagrams generated in 0.008 s
 Total: 1 processes with 3 diagrams
 add process g g > t t~ g
 INFO: Checking for minimal orders which gives processes. 
@@ -162,12 +161,18 @@ INFO: Process has 16 diagrams
 1 processes with 16 diagrams generated in 0.020 s
 Total: 2 processes with 19 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=32
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_gg_tt01g 
@@ -204,8 +209,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1604][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1628][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1629][0m [0m
-Generated helas calls for 2 subprocesses (19 diagrams) in 0.043 s
-Wrote files for 46 helas calls in 0.187 s
+Generated helas calls for 2 subprocesses (19 diagrams) in 0.044 s
+Wrote files for 46 helas calls in 0.192 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
@@ -213,14 +218,14 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 5 routines in  0.324 s
+ALOHA: aloha creates 5 routines in  0.335 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 10 routines in  0.308 s
+ALOHA: aloha creates 10 routines in  0.320 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -270,10 +275,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m5.585s
-user	0m2.269s
-sys	0m0.318s
-Code generation completed in 6 seconds
+real	0m2.649s
+user	0m2.334s
+sys	0m0.313s
+Code generation completed in 3 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk
index bd9ad881b3..40920b4e36 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk
@@ -795,8 +795,13 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
+endif
 endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt
index 0ff2bec0cd..7b03a55987 100644
--- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt
+++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt
@@ -57,7 +57,7 @@ generate g g > t t~ g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.00566864013671875 [0m
+[1;32mDEBUG: model prefixing  takes 0.005819559097290039 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -150,15 +150,21 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1  
 INFO: Process has 16 diagrams 
-1 processes with 16 diagrams generated in 0.021 s
+1 processes with 16 diagrams generated in 0.022 s
 Total: 1 processes with 16 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=32
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_gg_ttg 
@@ -182,8 +188,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxg
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1604][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1628][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1629][0m [0m
-Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s
-Wrote files for 36 helas calls in 0.120 s
+Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s
+Wrote files for 36 helas calls in 0.125 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
@@ -191,14 +197,14 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 5 routines in  0.328 s
+ALOHA: aloha creates 5 routines in  0.335 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 10 routines in  0.313 s
+ALOHA: aloha creates 10 routines in  0.320 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -241,10 +247,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.429s
-user	0m2.166s
-sys	0m0.265s
-Code generation completed in 2 seconds
+real	0m2.650s
+user	0m2.229s
+sys	0m0.268s
+Code generation completed in 3 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk
index bd9ad881b3..40920b4e36 100644
--- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk
@@ -795,8 +795,13 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
+endif
 endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt
index a47c73a7ff..00557adee8 100644
--- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt
+++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt
@@ -57,7 +57,7 @@ generate g g > t t~ g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.0056798458099365234 [0m
+[1;32mDEBUG: model prefixing  takes 0.005669355392456055 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -150,9 +150,11 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1  
 INFO: Process has 16 diagrams 
-1 processes with 16 diagrams generated in 0.022 s
+1 processes with 16 diagrams generated in 0.023 s
 Total: 1 processes with 16 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
@@ -172,14 +174,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. 
-Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s
+Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 5 routines in  0.326 s
+ALOHA: aloha creates 5 routines in  0.335 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -201,7 +203,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. 
 quit
 
-real	0m0.775s
-user	0m0.728s
-sys	0m0.045s
-Code generation completed in 0 seconds
+real	0m0.816s
+user	0m0.740s
+sys	0m0.055s
+Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk
index bd9ad881b3..40920b4e36 100644
--- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk
@@ -795,8 +795,13 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
+endif
 endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt
index d91926119e..485a143814 100644
--- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt
+++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt
@@ -2,7 +2,6 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
-('WARNING: loading of madgraph too slow!!!', 3.1265456676483154)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -58,7 +57,7 @@ generate g g > t t~ g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005780458450317383 [0m
+[1;32mDEBUG: model prefixing  takes 0.005662202835083008 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -151,15 +150,21 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1  
 INFO: Process has 123 diagrams 
-1 processes with 123 diagrams generated in 0.160 s
+1 processes with 123 diagrams generated in 0.163 s
 Total: 1 processes with 123 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_gg_ttgg 
@@ -183,8 +188,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxgg
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 105 [1;30m[model_handling.py at line 1604][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [1;30m[model_handling.py at line 1628][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [1;30m[model_handling.py at line 1629][0m [0m
-Generated helas calls for 1 subprocesses (123 diagrams) in 0.421 s
-Wrote files for 222 helas calls in 0.669 s
+Generated helas calls for 1 subprocesses (123 diagrams) in 0.441 s
+Wrote files for 222 helas calls in 0.687 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
@@ -192,14 +197,14 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.333 s
+ALOHA: aloha creates 5 routines in  0.344 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 10 routines in  0.328 s
+ALOHA: aloha creates 10 routines in  0.324 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -245,10 +250,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m6.857s
-user	0m3.548s
-sys	0m0.300s
-Code generation completed in 7 seconds
+real	0m3.907s
+user	0m3.612s
+sys	0m0.284s
+Code generation completed in 4 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk
index bd9ad881b3..40920b4e36 100644
--- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk
@@ -795,8 +795,13 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
+endif
 endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt
index 5a0e2d7582..2f2d518acf 100644
--- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt
+++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt
@@ -2,7 +2,6 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
-('WARNING: loading of madgraph too slow!!!', 3.1273341178894043)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -58,7 +57,7 @@ generate g g > t t~ g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005579710006713867 [0m
+[1;32mDEBUG: model prefixing  takes 0.005728960037231445 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -151,9 +150,11 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1  
 INFO: Process has 123 diagrams 
-1 processes with 123 diagrams generated in 0.165 s
+1 processes with 123 diagrams generated in 0.164 s
 Total: 1 processes with 123 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
@@ -173,7 +174,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. 
-Generated helas calls for 1 subprocesses (123 diagrams) in 0.434 s
+Generated helas calls for 1 subprocesses (123 diagrams) in 0.437 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
@@ -205,7 +206,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. 
 quit
 
-real	0m4.478s
-user	0m1.411s
-sys	0m0.060s
-Code generation completed in 4 seconds
+real	0m1.492s
+user	0m1.425s
+sys	0m0.049s
+Code generation completed in 2 seconds
diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk
index bd9ad881b3..40920b4e36 100644
--- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk
@@ -795,8 +795,13 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
+endif
 endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt
index 3725e24847..faa5e0d672 100644
--- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt
+++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt
@@ -2,7 +2,6 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
-('WARNING: loading of madgraph too slow!!!', 3.1302731037139893)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -58,7 +57,7 @@ generate g g > t t~ g g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005527973175048828 [0m
+[1;32mDEBUG: model prefixing  takes 0.0056841373443603516 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -151,15 +150,21 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1  
 INFO: Process has 1240 diagrams 
-1 processes with 1240 diagrams generated in 1.894 s
+1 processes with 1240 diagrams generated in 1.950 s
 Total: 1 processes with 1240 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_gg_ttggg 
@@ -185,8 +190,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxggg
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 945 [1;30m[model_handling.py at line 1604][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [1;30m[model_handling.py at line 1628][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [1;30m[model_handling.py at line 1629][0m [0m
-Generated helas calls for 1 subprocesses (1240 diagrams) in 6.532 s
-Wrote files for 2281 helas calls in 18.323 s
+Generated helas calls for 1 subprocesses (1240 diagrams) in 6.769 s
+Wrote files for 2281 helas calls in 18.956 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
@@ -194,14 +199,14 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.322 s
+ALOHA: aloha creates 5 routines in  0.375 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 10 routines in  0.314 s
+ALOHA: aloha creates 10 routines in  0.325 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -247,10 +252,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m35.534s
-user	0m31.967s
-sys	0m0.462s
-Code generation completed in 36 seconds
+real	0m33.613s
+user	0m33.028s
+sys	0m0.467s
+Code generation completed in 34 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk
index bd9ad881b3..40920b4e36 100644
--- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk
@@ -795,8 +795,13 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
+endif
 endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt
index 49c1de1923..a36d10f3c3 100644
--- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt
+++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt
@@ -2,7 +2,6 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
-('WARNING: loading of madgraph too slow!!!', 3.1222457885742188)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -58,7 +57,7 @@ generate g g > t t~ g g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005368709564208984 [0m
+[1;32mDEBUG: model prefixing  takes 0.005714893341064453 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -151,9 +150,11 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1  
 INFO: Process has 1240 diagrams 
-1 processes with 1240 diagrams generated in 1.869 s
+1 processes with 1240 diagrams generated in 1.936 s
 Total: 1 processes with 1240 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
@@ -173,14 +174,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. 
-Generated helas calls for 1 subprocesses (1240 diagrams) in 6.510 s
+Generated helas calls for 1 subprocesses (1240 diagrams) in 6.780 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.353 s
+ALOHA: aloha creates 5 routines in  0.360 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -205,7 +206,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. 
 quit
 
-real	0m15.955s
-user	0m12.788s
-sys	0m0.111s
-Code generation completed in 16 seconds
+real	0m13.504s
+user	0m13.319s
+sys	0m0.105s
+Code generation completed in 13 seconds
diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk
index bd9ad881b3..40920b4e36 100644
--- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk
@@ -795,8 +795,13 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
+endif
 endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt
index 9710cfb084..4d28461afe 100644
--- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt
+++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt
@@ -2,7 +2,6 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
-('WARNING: loading of madgraph too slow!!!', 3.124884605407715)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -57,7 +56,7 @@ set zerowidth_tchannel F
 define q = u c d s u~ c~ d~ s~
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005639791488647461 [0m
+[1;32mDEBUG: model prefixing  takes 0.005707502365112305 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -166,15 +165,21 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams.
 INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. 
 INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. 
 INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. 
-8 processes with 40 diagrams generated in 0.078 s
+8 processes with 40 diagrams generated in 0.081 s
 Total: 8 processes with 40 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_gq_ttq 
@@ -217,17 +222,17 @@ INFO: Finding symmetric diagrams for subprocess group gux_ttxux
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1604][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1628][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1629][0m [0m
-Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s
-Wrote files for 32 helas calls in 0.163 s
+Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s
+Wrote files for 32 helas calls in 0.169 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.145 s
+ALOHA: aloha creates 2 routines in  0.149 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVV1 routines[0m
-ALOHA: aloha creates 4 routines in  0.132 s
+ALOHA: aloha creates 4 routines in  0.138 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -275,10 +280,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m5.184s
-user	0m1.878s
-sys	0m0.306s
-Code generation completed in 6 seconds
+real	0m2.255s
+user	0m1.967s
+sys	0m0.282s
+Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk
index bd9ad881b3..40920b4e36 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk
@@ -795,8 +795,13 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
+endif
 endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt
index ac0ee82295..c13cc9fd71 100644
--- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt
+++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt
@@ -2,7 +2,6 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
-('WARNING: loading of madgraph too slow!!!', 3.1240127086639404)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -57,7 +56,7 @@ set zerowidth_tchannel F
 define q = u c d s u~ c~ d~ s~
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.0057010650634765625 [0m
+[1;32mDEBUG: model prefixing  takes 0.005742549896240234 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -166,9 +165,11 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams.
 INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. 
 INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. 
 INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. 
-8 processes with 40 diagrams generated in 0.078 s
+8 processes with 40 diagrams generated in 0.082 s
 Total: 8 processes with 40 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
@@ -205,11 +206,11 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. 
-Generated helas calls for 2 subprocesses (10 diagrams) in 0.029 s
+Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.145 s
+ALOHA: aloha creates 2 routines in  0.149 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -227,7 +228,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. 
 quit
 
-real	0m3.646s
-user	0m0.580s
-sys	0m0.062s
-Code generation completed in 3 seconds
+real	0m0.677s
+user	0m0.609s
+sys	0m0.053s
+Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk
index bd9ad881b3..40920b4e36 100644
--- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk
@@ -795,8 +795,13 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
+endif
 endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt
index bf6f44f959..2dd3979088 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt
+++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt
@@ -2,7 +2,6 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
-('WARNING: loading of madgraph too slow!!!', 3.1262729167938232)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -126,12 +125,18 @@ INFO: Process has 4 diagrams
 1 processes with 4 diagrams generated in 0.006 s
 Total: 1 processes with 4 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_heft_gg_bb --hel_recycling=False --vector_size=32
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_heft_gg_bb 
@@ -155,21 +160,21 @@ INFO: Finding symmetric diagrams for subprocess group gg_bbx
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 4 [1;30m[model_handling.py at line 1604][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1628][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1629][0m [0m
-Generated helas calls for 1 subprocesses (4 diagrams) in 0.008 s
-Wrote files for 12 helas calls in 0.075 s
+Generated helas calls for 1 subprocesses (4 diagrams) in 0.009 s
+Wrote files for 12 helas calls in 0.078 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVS3 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFS2 routines[0m
-ALOHA: aloha creates 4 routines in  0.263 s
+ALOHA: aloha creates 4 routines in  0.272 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVS3 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFS2 routines[0m
-ALOHA: aloha creates 8 routines in  0.246 s
+ALOHA: aloha creates 8 routines in  0.257 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVS3
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -209,10 +214,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m5.115s
-user	0m1.836s
-sys	0m0.279s
-Code generation completed in 6 seconds
+real	0m3.127s
+user	0m1.903s
+sys	0m0.292s
+Code generation completed in 3 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk
index bd9ad881b3..40920b4e36 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk
@@ -795,8 +795,13 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
+endif
 endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt
index e72c4d139b..5f4a381d59 100644
--- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt
+++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt
@@ -2,7 +2,6 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
-('WARNING: loading of madgraph too slow!!!', 3.1248199939727783)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -62,7 +61,7 @@ INFO: load particles
 INFO: load vertices 
 [1;34mWARNING: coupling GC_13=-(complex(0,1)*GH) has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model. [0m
 [1;34mWARNING: coupling GC_16=(complex(0,1)*Gphi)/8. has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model. [0m
-[1;32mDEBUG: model prefixing  takes 0.006158590316772461 [0m
+[1;32mDEBUG: model prefixing  takes 0.006167888641357422 [0m
 INFO: Restrict model heft with file models/heft/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: s u w+ at order: QED=1 [0m
@@ -131,6 +130,8 @@ INFO: Process has 4 diagrams
 1 processes with 4 diagrams generated in 0.006 s
 Total: 1 processes with 4 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_heft_gg_bb
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
@@ -156,7 +157,7 @@ ALOHA: aloha creates VVS3 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFS2 routines[0m
-ALOHA: aloha creates 4 routines in  0.269 s
+ALOHA: aloha creates 4 routines in  0.270 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVS3
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -175,7 +176,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. 
 quit
 
-real	0m3.668s
-user	0m0.614s
-sys	0m0.050s
-Code generation completed in 4 seconds
+real	0m0.685s
+user	0m0.620s
+sys	0m0.055s
+Code generation completed in 0 seconds
diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk
index bd9ad881b3..40920b4e36 100644
--- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk
@@ -795,8 +795,13 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
+endif
 endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt
index ad9954e12d..5649c9490d 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt
@@ -2,7 +2,6 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
-('WARNING: loading of madgraph too slow!!!', 3.1298129558563232)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -57,7 +56,7 @@ set zerowidth_tchannel F
 import model sm-no_b_mass
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005838632583618164 [0m
+[1;32mDEBUG: model prefixing  takes 0.005677461624145508 [0m
 INFO: Restrict model sm-no_b_mass with file models/sm/restrict_no_b_mass.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -181,7 +180,7 @@ INFO: Process u~ d > t t~ w- added to mirror process d u~ > t t~ w-
 INFO: Process c~ s > t t~ w- added to mirror process s c~ > t t~ w- 
 INFO: Process d~ u > t t~ w+ added to mirror process u d~ > t t~ w+ 
 INFO: Process s~ c > t t~ w+ added to mirror process c s~ > t t~ w+ 
-4 processes with 8 diagrams generated in 0.113 s
+4 processes with 8 diagrams generated in 0.112 s
 Total: 4 processes with 8 diagrams
 add process p p > t t~ w j @1
 INFO: Checking for minimal orders which gives processes. 
@@ -223,15 +222,21 @@ INFO: Process d~ g > t t~ w+ u~ added to mirror process g d~ > t t~ w+ u~
 INFO: Process d~ u > t t~ w+ g added to mirror process u d~ > t t~ w+ g 
 INFO: Process s~ g > t t~ w+ c~ added to mirror process g s~ > t t~ w+ c~ 
 INFO: Process s~ c > t t~ w+ g added to mirror process c s~ > t t~ w+ g 
-12 processes with 144 diagrams generated in 0.671 s
+12 processes with 144 diagrams generated in 0.663 s
 Total: 16 processes with 152 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_nobm_pp_ttW --hel_recycling=False --vector_size=32
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_nobm_pp_ttW 
@@ -354,19 +359,19 @@ INFO: Finding symmetric diagrams for subprocess group dux_ttxwm
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 2 [1;30m[model_handling.py at line 1604][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1628][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1629][0m [0m
-Generated helas calls for 8 subprocesses (76 diagrams) in 0.211 s
-Wrote files for 212 helas calls in 0.860 s
+Generated helas calls for 8 subprocesses (76 diagrams) in 0.209 s
+Wrote files for 212 helas calls in 0.849 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
-ALOHA: aloha creates 3 routines in  0.213 s
+ALOHA: aloha creates 3 routines in  0.210 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
-ALOHA: aloha creates 6 routines in  0.210 s
+ALOHA: aloha creates 6 routines in  0.207 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -464,10 +469,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m7.811s
-user	0m4.243s
-sys	0m0.554s
-Code generation completed in 8 seconds
+real	0m4.776s
+user	0m4.184s
+sys	0m0.578s
+Code generation completed in 5 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/couplings3.f b/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/couplings3.f
new file mode 100644
index 0000000000..2d4127fa27
--- /dev/null
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/couplings3.f
@@ -0,0 +1,20 @@
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+c      written by the UFO converter
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+
+      SUBROUTINE COUP3( VECID)
+
+      IMPLICIT NONE
+      INTEGER VECID
+      INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
+
+      DOUBLE PRECISION PI, ZERO
+      PARAMETER  (PI=3.141592653589793D0)
+      PARAMETER  (ZERO=0D0)
+      INCLUDE 'input.inc'
+      INCLUDE 'coupl.inc'
+      GC_10(VECID) = -G
+      GC_11(VECID) = MDL_COMPLEXI*G
+      END
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/lep_densities/isrbetll/eepdf.f b/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/lep_densities/isrbetll/eepdf.f
new file mode 100644
index 0000000000..1a10b7ea93
--- /dev/null
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/lep_densities/isrbetll/eepdf.f
@@ -0,0 +1,113 @@
+c     This function calculate the reduced structure function, with energy
+c     fraction given by "x", at scale "Qsquare"
+      real*8 function eepdf_tilde(x,Qsquare,n,partonid,beamid)
+      implicit none
+      real*8 x,Qsquare
+      real*8 me
+      data me /0.511d-3/
+      real*8 PI
+      real*8 alphaem
+c     In Gmu scheme
+      data alphaem/0.007562397d0/
+      real*8 beta
+      integer n,partonid,beamid
+      real*8 isr_tilde_racoon
+      real*8 res
+      data res/0d0/
+
+      PI=4.D0*DATAN(1.D0)
+      
+      beta = alphaem/PI * (dlog(Qsquare/me/me)-1)
+
+c     electron beam
+      if (beamid .eq. 11) then
+c     other partons are zero
+          if (partonid .ne. 11) then
+             res = 0d0
+          else
+             if (n .eq. 1) then
+                res = isr_tilde_racoon(x,beta)
+             else
+                res = 0d0
+             endif
+          endif
+      else if (beamid .eq. -11) then
+          if (partonid .ne. -11) then
+              res = 0d0
+          else
+             if (n .eq. 1) then
+                res = isr_tilde_racoon(x,beta)
+             else
+                res = 0d0
+             endif
+          endif
+      endif
+      eepdf_tilde = res
+      end
+      
+c     https://arxiv.org/pdf/hep-ph/0302198.pdf, eq.(2.44)
+c     note that beta_e in eq.(2.45) is twice our beta
+c     so eq.(2.44) needs to be corrected by some factor of 2
+      real*8 function isr_tilde_racoon(x,beta)
+      implicit none
+      real*8 x,beta
+      real*8 res
+      real*8 PI
+      real*8 gE
+      real*8 logx, logomx
+      real*8 dlgam,DDILOG
+      external dlgam,DDILOG
+      PI=4.D0*DATAN(1.D0)
+      gE=0.5772156649d0
+      res=0d0
+      if (x .lt. 0.9999999d0) then
+         logx=dlog(x)
+         logomx=dlog(1d0-x)
+c     ----------------------------
+c     order alpha
+         res=-beta*(1d0+x)/2d0
+c     order alpha^2
+         res=res-(beta**2)/8d0*(
+     c        (1d0+3d0*x*x)/(1d0-x)*logx
+     c        +4d0*(1d0+x)*logomx+5d0+x)
+c     order alpha^3
+         res=res-(beta**3)/128d0*(
+     c        (1d0+x)*(6d0*DDILOG(x)+12d0*(logomx**2)-3d0*PI**2)
+     c        +(1d0/(1d0-x)) * (
+     c        (3d0/2d0)*(1d0+8d0*x+3d0*x**2)*logx
+     c        +6d0*(x+5d0)*(1d0-x)*logomx
+     c        +12d0*(1d0+x**2)*logx*logomx
+     c        -1d0/2d0*(1d0+7d0*x**2)*logx**2
+     c        +1d0/4d0*(39d0-24d0*x-15d0*x**2)))
+c     ----------------------------
+         res=res*(1d0-x)**(1-beta)
+      endif
+      res=res+exp(beta*(-gE+3d0/4d0))/exp(dlgam(1d0+beta))*beta
+      isr_tilde_racoon=res
+      end
+
+cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc      
+      function dlgam(xx)
+c real logarithm of gamma function
+      implicit real * 8 (a-h,o-z)
+      real * 8 cof(6),stp,gt,g,cst
+      data cof,stp/76.18009173d0,-86.50532033d0,24.01409822d0,
+     # -1.231739516d0,.120858003d-2,-.536382d-5,2.50662827465d0/
+      data gt,g/5.5d0,5.0d0/
+      data cst/4.081061466d0/
+      x = xx - 1
+      xpgt = x + gt
+      xmp5  = x + .5d0
+      s0 = 1
+      do 1 j=1,6
+        x = x + 1
+        tmp = cof(j)/x
+        s0  = s0 + tmp
+  1     continue
+      r10 = log(s0)
+      dlgam = xmp5*(log(xpgt)-1) + r10 - cst
+      return
+      end      
+
+cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+      
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/lep_densities/isrbetll/gridpdfaux.f b/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/lep_densities/isrbetll/gridpdfaux.f
new file mode 100644
index 0000000000..c09eae9ea2
--- /dev/null
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/lep_densities/isrbetll/gridpdfaux.f
@@ -0,0 +1,140 @@
+      integer function eepdf_n_components(partonid,beamid)
+      implicit none
+      integer partonid,beamid
+      integer ncom
+c     electron beam
+      if (beamid .eq. 11) then
+c     other partons are zero
+        if (partonid .ne. 11) then
+          ncom=0
+        else
+          ncom=1
+        endif
+      else if (beamid .eq. -11) then
+        if (partonid .ne. -11) then
+          ncom=0
+        else
+          ncom=1
+        endif
+      endif
+      eepdf_n_components=ncom
+      end
+
+c     This function return the power of (1-x)
+      real*8 function eepdf_tilde_power(Q2,n,partonid,beamid)
+      implicit none
+      real*8 me
+      data me /0.511d-3/
+      real*8 PI
+      real*8 alphaem
+c     In Gmu scheme
+      data alphaem/0.007562397d0/
+      real*8 beta,eta,Q2
+      integer n,partonid,beamid
+      real*8 k,b
+
+      PI=4.D0*DATAN(1.D0)
+      beta = alphaem/PI * (dlog(Q2/me/me)-1)
+      eta = alphaem/PI * (dlog(Q2/me/me))
+      b=-2.D0/3.D0
+
+c     electron beam
+      if (beamid .eq. 11) then
+c     other partons are zero
+        if (partonid .ne. 11) then
+          k=0d0
+        else
+          if (n .eq. 1) then
+            k=1d0-beta
+          else
+            k=0d0
+          endif
+        endif
+      else if (beamid .eq. -11) then
+        if (partonid .ne. -11) then
+          k=0d0
+        else
+          if (n .eq. 1) then
+            k=1d0-beta
+          else
+            k=0d0
+          endif
+        endif
+      endif
+      eepdf_tilde_power = k
+      end
+
+c     This function return the type of this component
+      integer function eepdf_tilde_type(n,partonid,beamid)
+      implicit none
+      integer n,partonid,beamid
+      integer res
+
+c     electron beam
+      if (beamid .eq. 11) then
+c     other partons are zero
+        if (partonid .ne. 11) then
+          res=0
+        else
+          if (n .eq. 1) then
+            res=1
+          else
+            res=0
+          endif
+        endif
+      else if (beamid .eq. -11) then
+        if (partonid .ne. -11) then
+          res=0
+        else
+          if (n .eq. 1) then
+            res=1
+          else
+            res=0
+          endif
+        endif
+      endif
+      eepdf_tilde_type = res
+      end
+
+c     This is to calculate the factor for grid implementation
+      real*8 function eepdf_tilde_factor(x,Q2,n,partonid,beamid)
+      implicit none
+      real*8 x,Q2
+      real*8 me
+      data me /0.511d-3/
+      real*8 PI
+      real*8 alphaem
+c     In Gmu scheme
+      data alphaem/0.007562397d0/
+      real*8 beta
+      integer n,partonid,beamid
+      real*8 res
+
+      PI=4.D0*DATAN(1.D0)
+      beta = alphaem/PI * (dlog(Q2/me/me))
+
+c     electron beam
+      if (beamid .eq. 11) then
+c     other partons are zero
+        if (partonid .ne. 11) then
+          res=1d0
+        else
+          if (n .eq. 1) then
+            res = 1d0
+          else
+            res = 1d0
+          endif
+        endif
+      else if (beamid .eq. -11) then
+        if (partonid .ne. -11) then
+          res = 1d0
+        else
+          if (n .eq. 1) then
+            res = 1d0
+          else
+            res = 1d0
+          endif
+        endif
+      endif
+      eepdf_tilde_factor = res
+      end
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/lep_densities/isrbetmll/eepdf.f b/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/lep_densities/isrbetmll/eepdf.f
new file mode 100644
index 0000000000..e58b845094
--- /dev/null
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/lep_densities/isrbetmll/eepdf.f
@@ -0,0 +1,112 @@
+c     This function calculate the reduced structure function, with energy
+c     fraction given by "x", at scale "Qsquare"
+      real*8 function eepdf_tilde(x,Qsquare,n,partonid,beamid)
+      implicit none
+      real*8 x,Qsquare
+      real*8 me
+      data me /0.105658d0/
+      real*8 PI
+      real*8 alphaem
+c     In Gmu scheme
+      data alphaem/0.007562397d0/
+      real*8 beta
+      integer n,partonid,beamid
+      real*8 isr_tilde_racoon
+      real*8 res
+      data res/0d0/
+
+      PI=4.D0*DATAN(1.D0)
+      
+      beta = alphaem/PI * (dlog(Qsquare/me/me)-1)
+
+c     electron beam
+      if (beamid .eq. 13) then
+c     other partons are zero
+          if (partonid .ne. 13) then
+             res = 0d0
+          else
+             if (n .eq. 1) then
+                res = isr_tilde_racoon(x,beta)
+             else
+                res = 0d0
+             endif
+          endif
+      else if (beamid .eq. -13) then
+          if (partonid .ne. -13) then
+              res = 0d0
+          else
+             if (n .eq. 1) then
+                res = isr_tilde_racoon(x,beta)
+             else
+                res = 0d0
+             endif
+          endif
+      endif
+      eepdf_tilde = res
+      end
+      
+c     https://arxiv.org/pdf/hep-ph/0302198.pdf, eq.(2.44)
+c     note that beta_e in eq.(2.45) is twice our beta
+c     so eq.(2.44) needs to be corrected by some factor of 2
+      real*8 function isr_tilde_racoon(x,beta)
+      implicit none
+      real*8 x,beta
+      real*8 res
+      real*8 PI
+      real*8 gE
+      real*8 logx, logomx
+      real*8 dlgam,DDILOG
+      external dlgam,DDILOG
+      PI=4.D0*DATAN(1.D0)
+      gE=0.5772156649d0
+      res=0d0
+      if (x .lt. 0.9999999d0) then
+         logx=dlog(x)
+         logomx=dlog(1d0-x)
+c     ----------------------------
+c     order alpha
+         res=-beta*(1d0+x)/2d0
+c     order alpha^2
+         res=res-(beta**2)/8d0*(
+     c        (1d0+3d0*x*x)/(1d0-x)*logx
+     c        +4d0*(1d0+x)*logomx+5d0+x)
+c     order alpha^3
+         res=res-(beta**3)/128d0*(
+     c        (1d0+x)*(6d0*DDILOG(x)+12d0*(logomx**2)-3d0*PI**2)
+     c        +(1d0/(1d0-x)) * (
+     c        (3d0/2d0)*(1d0+8d0*x+3d0*x**2)*logx
+     c        +6d0*(x+5d0)*(1d0-x)*logomx
+     c        +12d0*(1d0+x**2)*logx*logomx
+     c        -1d0/2d0*(1d0+7d0*x**2)*logx**2
+     c        +1d0/4d0*(39d0-24d0*x-15d0*x**2)))
+c     ----------------------------
+         res=res*(1d0-x)**(1-beta)
+      endif
+      res=res+exp(beta*(-gE+3d0/4d0))/exp(dlgam(1d0+beta))*beta
+      isr_tilde_racoon=res
+      end
+
+cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc      
+      function dlgam(xx)
+c real logarithm of gamma function
+      implicit real * 8 (a-h,o-z)
+      real * 8 cof(6),stp,gt,g,cst
+      data cof,stp/76.18009173d0,-86.50532033d0,24.01409822d0,
+     # -1.231739516d0,.120858003d-2,-.536382d-5,2.50662827465d0/
+      data gt,g/5.5d0,5.0d0/
+      data cst/4.081061466d0/
+      x = xx - 1
+      xpgt = x + gt
+      xmp5  = x + .5d0
+      s0 = 1
+      do 1 j=1,6
+        x = x + 1
+        tmp = cof(j)/x
+        s0  = s0 + tmp
+  1     continue
+      r10 = log(s0)
+      dlgam = xmp5*(log(xpgt)-1) + r10 - cst
+      return
+      end      
+
+cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/lep_densities/isrbetmll/gridpdfaux.f b/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/lep_densities/isrbetmll/gridpdfaux.f
new file mode 100644
index 0000000000..21cc0a4c35
--- /dev/null
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/lep_densities/isrbetmll/gridpdfaux.f
@@ -0,0 +1,140 @@
+      integer function eepdf_n_components(partonid,beamid)
+      implicit none
+      integer partonid,beamid
+      integer ncom
+c     muon beam
+      if (beamid .eq. 13) then
+c     other partons are zero
+        if (partonid .ne. 13) then
+          ncom=0
+        else
+          ncom=1
+        endif
+      else if (beamid .eq. -13) then
+        if (partonid .ne. -13) then
+          ncom=0
+        else
+          ncom=1
+        endif
+      endif
+      eepdf_n_components=ncom
+      end
+
+c     This function return the power of (1-x)
+      real*8 function eepdf_tilde_power(Q2,n,partonid,beamid)
+      implicit none
+      real*8 me
+      data me /0.105658d0/
+      real*8 PI
+      real*8 alphaem
+c     In Gmu scheme
+      data alphaem/0.007562397d0/
+      real*8 beta,eta,Q2
+      integer n,partonid,beamid
+      real*8 k,b
+
+      PI=4.D0*DATAN(1.D0)
+      beta = alphaem/PI * (dlog(Q2/me/me)-1)
+      eta = alphaem/PI * (dlog(Q2/me/me))
+      b=-2.D0/3.D0
+
+c     muon beam
+      if (beamid .eq. 13) then
+c     other partons are zero
+        if (partonid .ne. 13) then
+          k=0d0
+        else
+          if (n .eq. 1) then
+            k=1d0-beta
+          else
+            k=0d0
+          endif
+        endif
+      else if (beamid .eq. -13) then
+        if (partonid .ne. -13) then
+          k=0d0
+        else
+          if (n .eq. 1) then
+            k=1d0-beta
+          else
+            k=0d0
+          endif
+        endif
+      endif
+      eepdf_tilde_power = k
+      end
+
+c     This function return the type of this component
+      integer function eepdf_tilde_type(n,partonid,beamid)
+      implicit none
+      integer n,partonid,beamid
+      integer res
+
+c     muon beam
+      if (beamid .eq. 13) then
+c     other partons are zero
+        if (partonid .ne. 13) then
+          res=0
+        else
+          if (n .eq. 1) then
+            res=1
+          else
+            res=0
+          endif
+        endif
+      else if (beamid .eq. -13) then
+        if (partonid .ne. -13) then
+          res=0
+        else
+          if (n .eq. 1) then
+            res=1
+          else
+            res=0
+          endif
+        endif
+      endif
+      eepdf_tilde_type = res
+      end
+
+c     This is to calculate the factor for grid implementation
+      real*8 function eepdf_tilde_factor(x,Q2,n,partonid,beamid)
+      implicit none
+      real*8 x,Q2
+      real*8 me
+      data me /0.105658d0/
+      real*8 PI
+      real*8 alphaem
+c     In Gmu scheme
+      data alphaem/0.007562397d0/
+      real*8 beta
+      integer n,partonid,beamid
+      real*8 res
+
+      PI=4.D0*DATAN(1.D0)
+      beta = alphaem/PI * (dlog(Q2/me/me))
+
+c     muon beam
+      if (beamid .eq. 13) then
+c     other partons are zero
+        if (partonid .ne. 13) then
+          res=1d0
+        else
+          if (n .eq. 1) then
+            res = 1d0
+          else
+            res = 1d0
+          endif
+        endif
+      else if (beamid .eq. -13) then
+        if (partonid .ne. -13) then
+          res = 1d0
+        else
+          if (n .eq. 1) then
+            res = 1d0
+          else
+            res = 1d0
+          endif
+        endif
+      endif
+      eepdf_tilde_factor = res
+      end
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/lep_densities/isrbetmll/info b/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/lep_densities/isrbetmll/info
new file mode 100644
index 0000000000..53e674c5ff
--- /dev/null
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/lep_densities/isrbetmll/info
@@ -0,0 +1 @@
+identity: -13,13
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/HelAmps.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/HelAmps.cc
new file mode 100644
index 0000000000..c69879b06a
--- /dev/null
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/HelAmps.cc
@@ -0,0 +1,273 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifdef MGONGPU_LINKER_HELAMPS
+
+#include "HelAmps_sm.h"
+
+// -----------------------------------------------------------------------------
+// *** NB: this implementation class depends on MemoryAccessMomenta,
+// *** where the AOSOA definition depends on CPPProcess::npar,
+// *** which may be different in different P* subprocess directories:
+// *** therefore this class is presently hosted and compiled in each P*
+// -----------------------------------------------------------------------------
+
+#include "MemoryAccessAmplitudes.h"
+#include "MemoryAccessCouplings.h"
+#include "MemoryAccessCouplingsFixed.h"
+#include "MemoryAccessGs.h"
+#include "MemoryAccessMatrixElements.h"
+#include "MemoryAccessMomenta.h"
+#include "MemoryAccessWavefunctions.h"
+
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+#include "MemoryAccessDenominators.h"
+#include "MemoryAccessNumerators.h"
+#endif
+
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  //--------------------------------------------------------------------------
+
+#ifdef MGONGPUCPP_GPUIMPL
+  using M_ACCESS = DeviceAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = DeviceAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = DeviceAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = DeviceAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = DeviceAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = DeviceAccessNumerators;   // non-trivial access: buffer includes all events
+  using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
+#endif
+#else
+  using namespace ::mg5amcCpu;
+  using M_ACCESS = HostAccessMomenta;         // non-trivial access: buffer includes all events
+  using E_ACCESS = HostAccessMatrixElements;  // non-trivial access: buffer includes all events
+  using W_ACCESS = HostAccessWavefunctions;   // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using A_ACCESS = HostAccessAmplitudes;      // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
+  using CD_ACCESS = HostAccessCouplings;      // non-trivial access (dependent couplings): buffer includes all events
+  using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
+#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
+  using NUM_ACCESS = HostAccessNumerators;    // non-trivial access: buffer includes all events
+  using DEN_ACCESS = HostAccessDenominators;  // non-trivial access: buffer includes all events
+#endif
+#endif
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CD_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_0( const fptype allF1[],
+                    const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    fptype allvertexes[] )
+  {
+    return FFV1_0<W_ACCESS, A_ACCESS, CI_ACCESS>( allF1, allF2, allV3, allCOUP, Ccoeff, allvertexes );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
+  {
+    return FFV1_1<W_ACCESS, CI_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV1_2<W_ACCESS, CI_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CD_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV1P0_3( const fptype allF1[],
+                      const fptype allF2[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M3,
+                      const fptype W3,
+                      fptype allV3[] )
+  {
+    return FFV1P0_3<W_ACCESS, CI_ACCESS>( allF1, allF2, allCOUP, Ccoeff, M3, W3, allV3 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV2_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
+  {
+    return FFV2_1<W_ACCESS, CD_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV2_1( const fptype allF2[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M1,
+                    const fptype W1,
+                    fptype allF1[] )
+  {
+    return FFV2_1<W_ACCESS, CI_ACCESS>( allF2, allV3, allCOUP, Ccoeff, M1, W1, allF1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_FFV2_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV2_2<W_ACCESS, CD_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_FFV2_2( const fptype allF1[],
+                    const fptype allV3[],
+                    const fptype allCOUP[],
+                    const double Ccoeff,
+                    const fptype M2,
+                    const fptype W2,
+                    fptype allF2[] )
+  {
+    return FFV2_2<W_ACCESS, CI_ACCESS>( allF1, allV3, allCOUP, Ccoeff, M2, W2, allF2 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (dependent couplings)
+  __device__ void
+  linker_CD_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CD_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+
+  // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] (independent couplings)
+  __device__ void
+  linker_CI_VVV1P0_1( const fptype allV2[],
+                      const fptype allV3[],
+                      const fptype allCOUP[],
+                      const double Ccoeff,
+                      const fptype M1,
+                      const fptype W1,
+                      fptype allV1[] )
+  {
+    return VVV1P0_1<W_ACCESS, CI_ACCESS>( allV2, allV3, allCOUP, Ccoeff, M1, W1, allV1 );
+  }
+
+  //--------------------------------------------------------------------------
+}
+#endif
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/HelAmps.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/HelAmps.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/HelAmps.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/HelAmps.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/HelAmps.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/HelAmps.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/HelAmps.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/HelAmps.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/HelAmps.cc
new file mode 120000
index 0000000000..461bb9d6ff
--- /dev/null
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/HelAmps.cc
@@ -0,0 +1 @@
+../HelAmps.cc
\ No newline at end of file
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk
index bd9ad881b3..40920b4e36 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk
@@ -795,8 +795,13 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
+endif
 endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt
index 86faba3ce9..1d8855f6c4 100644
--- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt
+++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt
@@ -2,7 +2,6 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
-('WARNING: loading of madgraph too slow!!!', 3.124648332595825)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -57,7 +56,7 @@ set zerowidth_tchannel F
 define j = p
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005530357360839844 [0m
+[1;32mDEBUG: model prefixing  takes 0.0058557987213134766 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -168,7 +167,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~
 INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ 
 INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ 
 INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ 
-5 processes with 7 diagrams generated in 0.028 s
+5 processes with 7 diagrams generated in 0.030 s
 Total: 5 processes with 7 diagrams
 add process p p > t t~ j @1
 INFO: Checking for minimal orders which gives processes. 
@@ -208,7 +207,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~
 INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g 
 INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ 
 INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g 
-13 processes with 76 diagrams generated in 0.135 s
+13 processes with 76 diagrams generated in 0.141 s
 Total: 18 processes with 83 diagrams
 add process p p > t t~ j j @2
 INFO: Checking for minimal orders which gives processes. 
@@ -374,15 +373,21 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~
 INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ 
 INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ 
 INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. 
-65 processes with 1119 diagrams generated in 1.822 s
+65 processes with 1119 diagrams generated in 1.889 s
 Total: 83 processes with 1202 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_pp_tt012j 
@@ -692,8 +697,8 @@ INFO: Finding symmetric diagrams for subprocess group uux_ttx
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 1 [1;30m[model_handling.py at line 1604][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1} [1;30m[model_handling.py at line 1628][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1} [1;30m[model_handling.py at line 1629][0m [0m
-Generated helas calls for 18 subprocesses (372 diagrams) in 1.284 s
-Wrote files for 810 helas calls in 2.748 s
+Generated helas calls for 18 subprocesses (372 diagrams) in 1.328 s
+Wrote files for 810 helas calls in 2.853 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
@@ -701,14 +706,14 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.339 s
+ALOHA: aloha creates 5 routines in  0.346 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 10 routines in  0.313 s
+ALOHA: aloha creates 10 routines in  0.324 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -888,10 +893,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m13.421s
-user	0m9.432s
-sys	0m0.958s
-Code generation completed in 14 seconds
+real	0m10.783s
+user	0m9.774s
+sys	0m0.974s
+Code generation completed in 11 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk
index bd9ad881b3..40920b4e36 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk
@@ -795,8 +795,13 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
+endif
 endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt
index 6053b81b8f..1cbc864422 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt
@@ -2,7 +2,6 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
-('WARNING: loading of madgraph too slow!!!', 3.127755641937256)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -73,7 +72,7 @@ INFO: load vertices
 [1;32mDEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1) [0m
 [1;32mDEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3) [0m
 [1;32mDEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1) [0m
-[1;32mDEBUG: model prefixing  takes 0.13721013069152832 [0m
+[1;32mDEBUG: model prefixing  takes 0.14407634735107422 [0m
 INFO: Change particles name to pass to MG5 convention 
 Defined multiparticle p = g u c d s u~ c~ d~ s~
 Defined multiparticle j = g u c d s u~ c~ d~ s~
@@ -88,15 +87,21 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 
 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1  
 INFO: Process has 72 diagrams 
-1 processes with 72 diagrams generated in 3.743 s
+1 processes with 72 diagrams generated in 3.830 s
 Total: 1 processes with 72 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_smeft_gg_tttt 
@@ -120,8 +125,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxttx
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 70 [1;30m[model_handling.py at line 1604][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [1;30m[model_handling.py at line 1628][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [1;30m[model_handling.py at line 1629][0m [0m
-Generated helas calls for 1 subprocesses (72 diagrams) in 0.187 s
-Wrote files for 119 helas calls in 0.385 s
+Generated helas calls for 1 subprocesses (72 diagrams) in 0.194 s
+Wrote files for 119 helas calls in 0.403 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV5 routines[0m
@@ -129,14 +134,14 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV9 routines[0m
 ALOHA: aloha creates VVVV10 routines[0m
-ALOHA: aloha creates 5 routines in  0.317 s
+ALOHA: aloha creates 5 routines in  0.328 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV5 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV9 routines[0m
 ALOHA: aloha creates VVVV10 routines[0m
-ALOHA: aloha creates 10 routines in  0.328 s
+ALOHA: aloha creates 10 routines in  0.343 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -179,10 +184,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m10.174s
-user	0m6.874s
-sys	0m0.284s
-Code generation completed in 10 seconds
+real	0m7.380s
+user	0m7.061s
+sys	0m0.302s
+Code generation completed in 8 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk
index bd9ad881b3..40920b4e36 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk
@@ -795,8 +795,13 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
+endif
 endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
index 9be022da94..3499d26115 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
@@ -2,7 +2,6 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
-('WARNING: loading of madgraph too slow!!!', 3.124478340148926)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -73,7 +72,7 @@ INFO: load vertices
 [1;32mDEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1) [0m
 [1;32mDEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3) [0m
 [1;32mDEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1) [0m
-[1;32mDEBUG: model prefixing  takes 0.13663244247436523 [0m
+[1;32mDEBUG: model prefixing  takes 0.14396023750305176 [0m
 INFO: Change particles name to pass to MG5 convention 
 Defined multiparticle p = g u c d s u~ c~ d~ s~
 Defined multiparticle j = g u c d s u~ c~ d~ s~
@@ -88,9 +87,11 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 
 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1  
 INFO: Process has 72 diagrams 
-1 processes with 72 diagrams generated in 3.699 s
+1 processes with 72 diagrams generated in 3.828 s
 Total: 1 processes with 72 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
@@ -110,14 +111,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. 
-Generated helas calls for 1 subprocesses (72 diagrams) in 0.186 s
+Generated helas calls for 1 subprocesses (72 diagrams) in 0.192 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV5 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV9 routines[0m
 ALOHA: aloha creates VVVV10 routines[0m
-ALOHA: aloha creates 5 routines in  0.317 s
+ALOHA: aloha creates 5 routines in  0.327 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -139,7 +140,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. 
 quit
 
-real	0m8.078s
-user	0m4.991s
-sys	0m0.067s
-Code generation completed in 8 seconds
+real	0m5.681s
+user	0m5.176s
+sys	0m0.063s
+Code generation completed in 6 seconds
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk
index bd9ad881b3..40920b4e36 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk
@@ -795,8 +795,13 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
+endif
 endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt
index 1e8f1c0d96..f16a211807 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt
@@ -2,7 +2,6 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
-('WARNING: loading of madgraph too slow!!!', 3.1275925636291504)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -550,15 +549,21 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1  
 INFO: Process has 6 diagrams 
-1 processes with 6 diagrams generated in 0.122 s
+1 processes with 6 diagrams generated in 0.127 s
 Total: 1 processes with 6 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_susy_gg_t1t1 
@@ -583,18 +588,18 @@ INFO: Finding symmetric diagrams for subprocess group gg_t1t1x
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [1;30m[model_handling.py at line 1628][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [1;30m[model_handling.py at line 1629][0m [0m
 Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s
-Wrote files for 16 helas calls in 0.083 s
+Wrote files for 16 helas calls in 0.084 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VSS1 routines[0m
 ALOHA: aloha creates VVSS1 routines[0m
-ALOHA: aloha creates 3 routines in  0.184 s
+ALOHA: aloha creates 3 routines in  0.188 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VSS1 routines[0m
 ALOHA: aloha creates VVSS1 routines[0m
-ALOHA: aloha creates 6 routines in  0.180 s
+ALOHA: aloha creates 6 routines in  0.186 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
@@ -633,10 +638,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m5.957s
-user	0m2.652s
-sys	0m0.302s
-Code generation completed in 6 seconds
+real	0m3.170s
+user	0m2.747s
+sys	0m0.300s
+Code generation completed in 3 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk
index bd9ad881b3..40920b4e36 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk
@@ -795,8 +795,13 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
+endif
 endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt
index 19a94c3e16..bf486a39f2 100644
--- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt
+++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt
@@ -2,7 +2,6 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
-('WARNING: loading of madgraph too slow!!!', 3.128488302230835)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -550,9 +549,11 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1  
 INFO: Process has 6 diagrams 
-1 processes with 6 diagrams generated in 0.123 s
+1 processes with 6 diagrams generated in 0.127 s
 Total: 1 processes with 6 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
@@ -577,7 +578,7 @@ ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VSS1 routines[0m
 ALOHA: aloha creates VVSS1 routines[0m
-ALOHA: aloha creates 3 routines in  0.185 s
+ALOHA: aloha creates 3 routines in  0.192 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
@@ -595,7 +596,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. 
 quit
 
-real	0m4.349s
-user	0m1.276s
-sys	0m0.068s
-Code generation completed in 4 seconds
+real	0m1.498s
+user	0m1.289s
+sys	0m0.074s
+Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk
index bd9ad881b3..40920b4e36 100644
--- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk
@@ -795,8 +795,13 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
+endif
 endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt
index a9ec510ec7..1a997e0de0 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt
+++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt
@@ -2,7 +2,6 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
-('WARNING: loading of madgraph too slow!!!', 3.125643014907837)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -550,15 +549,21 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.118 s
+1 processes with 3 diagrams generated in 0.123 s
 Total: 1 processes with 3 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_susy_gg_tt 
@@ -583,16 +588,16 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1628][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1629][0m [0m
 Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
-Wrote files for 10 helas calls in 0.074 s
+Wrote files for 10 helas calls in 0.077 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.137 s
+ALOHA: aloha creates 2 routines in  0.140 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 4 routines in  0.133 s
+ALOHA: aloha creates 4 routines in  0.136 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -630,10 +635,10 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m5.828s
-user	0m2.532s
-sys	0m0.295s
-Code generation completed in 6 seconds
+real	0m2.967s
+user	0m2.605s
+sys	0m0.308s
+Code generation completed in 3 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk
index bd9ad881b3..40920b4e36 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk
@@ -795,8 +795,13 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
+endif
 endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries
diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt
index 82229a9c08..0f424b45da 100644
--- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt
+++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt
@@ -2,7 +2,6 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
-('WARNING: loading of madgraph too slow!!!', 3.126711368560791)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -57,7 +56,7 @@ set zerowidth_tchannel F
 import model MSSM_SLHA2
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.8969454765319824 [0m
+[1;32mDEBUG: model prefixing  takes 0.9416484832763672 [0m
 INFO: Restrict model MSSM_SLHA2 with file models/MSSM_SLHA2/restrict_default.dat . 
 INFO: Detect SLHA2 format. keeping restricted parameter in the param_card 
 [1;32mDEBUG: Simplifying conditional expressions [0m
@@ -553,9 +552,11 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.110 s
+1 processes with 3 diagrams generated in 0.112 s
 Total: 1 processes with 3 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt
+[1;34merror detected in plugin: maddm.[0m
+[1;34mNo module named 'maddm_interface'[0m
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
@@ -579,7 +580,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.135 s
+ALOHA: aloha creates 2 routines in  0.142 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -596,7 +597,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. 
 quit
 
-real	0m5.323s
-user	0m2.250s
+real	0m2.450s
+user	0m2.347s
 sys	0m0.065s
-Code generation completed in 5 seconds
+Code generation completed in 2 seconds
diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk
index bd9ad881b3..40920b4e36 100644
--- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk
@@ -795,8 +795,13 @@ endif
 ifeq ($(HELINL),L)
 cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
 gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
+ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
 $(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
 $(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
+else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build
+$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -fgpu-rdc # compilation fails if this is not added (lld: error: undefined hidden symbol: mg5amcGpu::linker_CD_FFV1_0)
+$(gpu_checkmain): LIBFLAGS += -fgpu-rdc --hip-link
+endif
 endif
 
 # Target (and build rules): C++ and CUDA/HIP shared libraries

From c4ec5df66b0b2f9b0bfece4c8616f6392bc94f6a Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Sat, 5 Oct 2024 08:16:18 +0200
Subject: [PATCH 50/50] [helas] move to CODEGEN logs from the latest
 upstream/master for easier merging

git checkout upstream/master $(git ls-tree --name-only upstream/master */CODEGEN*txt)
---
 .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt   |  58 ++--
 .../CODEGEN_cudacpp_ee_mumu_log.txt           |  48 ++--
 .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt       |  54 ++--
 .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt    |  44 ++-
 .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt |  74 +++--
 .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt     |  60 ++--
 .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt  |  48 ++--
 .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt   |  60 ++--
 .../CODEGEN_cudacpp_gg_ttgg_log.txt           |  48 ++--
 .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt |  62 ++--
 .../CODEGEN_cudacpp_gg_ttggg_log.txt          |  50 ++--
 .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt     |  72 ++---
 .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt  |  64 ++---
 .../CODEGEN_mad_heft_gg_bb_log.txt            |  58 ++--
 .../CODEGEN_cudacpp_heft_gg_bb_log.txt        |  49 ++--
 .../CODEGEN_mad_nobm_pp_ttW_log.txt           | 148 +++++-----
 .../CODEGEN_mad_pp_tt012j_log.txt             | 270 +++++++++---------
 .../CODEGEN_mad_smeft_gg_tttt_log.txt         |  62 ++--
 .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt     |  50 ++--
 .../CODEGEN_mad_susy_gg_t1t1_log.txt          |  58 ++--
 .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt      |  44 ++-
 .../CODEGEN_mad_susy_gg_tt_log.txt            |  56 ++--
 .../CODEGEN_cudacpp_susy_gg_tt_log.txt        |  47 ++-
 23 files changed, 693 insertions(+), 891 deletions(-)

diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
index 848d3bff3d..30d3ffc088 100644
--- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
+++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect         2024-06-17         *
+*         VERSION 3.6.0                 2024-09-30         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -57,7 +58,7 @@ generate e+ e- > mu+ mu-
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.00563359260559082 [0m
+[1;32mDEBUG: model prefixing  takes 0.006434440612792969 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -152,23 +153,14 @@ INFO: Process has 2 diagrams
 1 processes with 2 diagrams generated in 0.004 s
 Total: 1 processes with 2 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=32
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
-Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
-It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 171][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_ee_mumu 
 INFO: remove old information in CODEGEN_mad_ee_mumu 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 176][0m [0m
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu 
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards [0m
@@ -177,30 +169,30 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 
 INFO: Processing color information for process: e+ e- > mu+ mu- @1 
 INFO: Creating files in directory P1_epem_mupmum 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 
 INFO: Finding symmetric diagrams for subprocess group epem_mupmum 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 2 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 2 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1552][0m [0m
 Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s
-Wrote files for 8 helas calls in 0.072 s
+Wrote files for 8 helas calls in 0.069 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
-ALOHA: aloha creates 3 routines in  0.206 s
+ALOHA: aloha creates 3 routines in  0.201 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
 ALOHA: aloha creates FFV2_4 routines[0m
-ALOHA: aloha creates 7 routines in  0.263 s
+ALOHA: aloha creates 7 routines in  0.255 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2
@@ -209,14 +201,12 @@ ALOHA: aloha creates 7 routines in  0.263 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV4
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2_4
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2_4
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./HelAmps_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
@@ -235,17 +225,17 @@ Hunk #2 succeeded at 280 (offset 8 lines).
 Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
 Hunk #2 succeeded at 236 (offset 22 lines).
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 264][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.125s
-user	0m1.839s
-sys	0m0.259s
-Code generation completed in 3 seconds
+real	0m2.097s
+user	0m1.775s
+sys	0m0.272s
+Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
@@ -258,7 +248,7 @@ Code generation completed in 3 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -288,7 +278,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
index fcbb22f8af..1858165757 100644
--- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
+++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect         2024-06-17         *
+*         VERSION 3.6.0                 2024-09-30         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -57,7 +58,7 @@ generate e+ e- > mu+ mu-
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.006380796432495117 [0m
+[1;32mDEBUG: model prefixing  takes 0.0062215328216552734 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -149,37 +150,32 @@ INFO: Checking for minimal orders which gives processes.
 INFO: Please specify coupling orders to bypass this step. 
 INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1  
 INFO: Process has 2 diagrams 
-1 processes with 2 diagrams generated in 0.004 s
+1 processes with 2 diagrams generated in 0.005 s
 Total: 1 processes with 2 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_ee_mumu
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
-Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
-It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 171][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 176][0m [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu 
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 
 INFO: Processing color information for process: e+ e- > mu+ mu- @1 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 212][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 213][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 214][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 215][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 216][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 218][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 219][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 220][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 221][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 222][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. 
-Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s
+Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
 ALOHA: aloha creates FFV2_4 routines[0m
-ALOHA: aloha creates 4 routines in  0.277 s
+ALOHA: aloha creates 4 routines in  0.267 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2
@@ -188,19 +184,17 @@ ALOHA: aloha creates 4 routines in  0.277 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV4
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2_4
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2_4
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./HelAmps_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. 
 quit
 
-real	0m0.911s
-user	0m0.709s
-sys	0m0.061s
+real	0m0.781s
+user	0m0.590s
+sys	0m0.053s
 Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
index b3fdcd0b2d..0384ed0547 100644
--- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
+++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect         2024-06-17         *
+*         VERSION 3.6.0                 2024-09-30         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -57,7 +58,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005700588226318359 [0m
+[1;32mDEBUG: model prefixing  takes 0.0059719085693359375 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -153,23 +154,14 @@ INFO: Process has 3 diagrams
 1 processes with 3 diagrams generated in 0.008 s
 Total: 1 processes with 3 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=32
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
-Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
-It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 171][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_gg_tt 
 INFO: remove old information in CODEGEN_mad_gg_tt 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 176][0m [0m
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt 
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards [0m
@@ -178,18 +170,18 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
 INFO: Creating files in directory P1_gg_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1552][0m [0m
 Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
-Wrote files for 10 helas calls in 0.074 s
+Wrote files for 10 helas calls in 0.072 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
@@ -198,19 +190,17 @@ ALOHA: aloha creates 2 routines in  0.150 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 4 routines in  0.137 s
+ALOHA: aloha creates 4 routines in  0.134 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./HelAmps_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
@@ -229,16 +219,16 @@ Hunk #2 succeeded at 280 (offset 8 lines).
 Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
 Hunk #2 succeeded at 227 (offset 13 lines).
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 264][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m1.934s
-user	0m1.651s
-sys	0m0.280s
+real	0m1.997s
+user	0m1.613s
+sys	0m0.278s
 Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
@@ -252,7 +242,7 @@ Code generation completed in 2 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -282,7 +272,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt
index 323ed49835..ada2d7b4a3 100644
--- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt
+++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect         2024-06-17         *
+*         VERSION 3.6.0                 2024-09-30         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -57,7 +58,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005724668502807617 [0m
+[1;32mDEBUG: model prefixing  takes 0.006254673004150391 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -153,49 +154,42 @@ INFO: Process has 3 diagrams
 1 processes with 3 diagrams generated in 0.008 s
 Total: 1 processes with 3 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
-Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
-It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 171][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 176][0m [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt 
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 212][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 213][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 214][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 215][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 216][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 218][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 219][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 220][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 221][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 222][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. 
 Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.146 s
+ALOHA: aloha creates 2 routines in  0.144 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./HelAmps_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. 
 quit
 
-real	0m0.550s
-user	0m0.490s
-sys	0m0.047s
+real	0m0.532s
+user	0m0.478s
+sys	0m0.045s
 Code generation completed in 0 seconds
diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt
index 8dc238a8ca..3922a1c111 100644
--- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt
+++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect         2024-06-17         *
+*         VERSION 3.6.0                 2024-09-30         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -57,7 +58,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.0057926177978515625 [0m
+[1;32mDEBUG: model prefixing  takes 0.006289482116699219 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -158,26 +159,17 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2  
 INFO: Process has 16 diagrams 
-1 processes with 16 diagrams generated in 0.020 s
+1 processes with 16 diagrams generated in 0.019 s
 Total: 2 processes with 19 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=32
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
-Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
-It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 171][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_gg_tt01g 
 INFO: remove old information in CODEGEN_mad_gg_tt01g 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 176][0m [0m
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g 
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards [0m
@@ -188,29 +180,29 @@ INFO: Processing color information for process: g g > t t~ g @2
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
 INFO: Creating files in directory P2_gg_ttxg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxg 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P1_gg_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1629][0m [0m
-Generated helas calls for 2 subprocesses (19 diagrams) in 0.044 s
-Wrote files for 46 helas calls in 0.192 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1552][0m [0m
+Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s
+Wrote files for 46 helas calls in 0.189 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
@@ -218,14 +210,14 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 5 routines in  0.335 s
+ALOHA: aloha creates 5 routines in  0.338 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 10 routines in  0.320 s
+ALOHA: aloha creates 10 routines in  0.311 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -235,14 +227,12 @@ ALOHA: aloha creates 10 routines in  0.320 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV3
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./HelAmps_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
@@ -268,17 +258,17 @@ Hunk #2 succeeded at 280 (offset 8 lines).
 Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
 Hunk #2 succeeded at 243 (offset 29 lines).
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 264][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.649s
-user	0m2.334s
-sys	0m0.313s
-Code generation completed in 3 seconds
+real	0m2.618s
+user	0m2.304s
+sys	0m0.310s
+Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
@@ -291,7 +281,7 @@ Code generation completed in 3 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -321,7 +311,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt
index 7b03a55987..871e6fde69 100644
--- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt
+++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect         2024-06-17         *
+*         VERSION 3.6.0                 2024-09-30         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -57,7 +58,7 @@ generate g g > t t~ g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005819559097290039 [0m
+[1;32mDEBUG: model prefixing  takes 0.0062618255615234375 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -153,23 +154,14 @@ INFO: Process has 16 diagrams
 1 processes with 16 diagrams generated in 0.022 s
 Total: 1 processes with 16 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=32
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
-Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
-It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 171][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_gg_ttg 
 INFO: remove old information in CODEGEN_mad_gg_ttg 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 176][0m [0m
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg 
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards [0m
@@ -178,18 +170,18 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 
 INFO: Processing color information for process: g g > t t~ g @1 
 INFO: Creating files in directory P1_gg_ttxg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxg 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1629][0m [0m
-Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s
-Wrote files for 36 helas calls in 0.125 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1552][0m [0m
+Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s
+Wrote files for 36 helas calls in 0.123 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
@@ -197,14 +189,14 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 5 routines in  0.335 s
+ALOHA: aloha creates 5 routines in  1.397 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 10 routines in  0.320 s
+ALOHA: aloha creates 10 routines in  0.315 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -214,14 +206,12 @@ ALOHA: aloha creates 10 routines in  0.320 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV3
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./HelAmps_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
@@ -240,17 +230,17 @@ Hunk #2 succeeded at 280 (offset 8 lines).
 Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
 Hunk #2 succeeded at 243 (offset 29 lines).
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 264][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.650s
-user	0m2.229s
-sys	0m0.268s
-Code generation completed in 3 seconds
+real	0m3.568s
+user	0m2.185s
+sys	0m0.276s
+Code generation completed in 4 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
@@ -263,7 +253,7 @@ Code generation completed in 3 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -293,7 +283,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt
index 00557adee8..c0f0ecac53 100644
--- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt
+++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect         2024-06-17         *
+*         VERSION 3.6.0                 2024-09-30         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -57,7 +58,7 @@ generate g g > t t~ g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005669355392456055 [0m
+[1;32mDEBUG: model prefixing  takes 0.006242036819458008 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -150,38 +151,33 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1  
 INFO: Process has 16 diagrams 
-1 processes with 16 diagrams generated in 0.023 s
+1 processes with 16 diagrams generated in 0.021 s
 Total: 1 processes with 16 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
-Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
-It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 171][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 176][0m [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg 
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 
 INFO: Processing color information for process: g g > t t~ g @1 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 212][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 213][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 214][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 215][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 216][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 218][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 219][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 220][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 221][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 222][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. 
-Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s
+Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 5 routines in  0.335 s
+ALOHA: aloha creates 5 routines in  0.326 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -191,19 +187,17 @@ ALOHA: aloha creates 5 routines in  0.335 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV3
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./HelAmps_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. 
 quit
 
-real	0m0.816s
-user	0m0.740s
-sys	0m0.055s
+real	0m0.777s
+user	0m0.714s
+sys	0m0.058s
 Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt
index 485a143814..20192cdf8a 100644
--- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt
+++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect         2024-06-17         *
+*         VERSION 3.6.0                 2024-09-30         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -57,7 +58,7 @@ generate g g > t t~ g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005662202835083008 [0m
+[1;32mDEBUG: model prefixing  takes 0.006398916244506836 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -150,26 +151,17 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1  
 INFO: Process has 123 diagrams 
-1 processes with 123 diagrams generated in 0.163 s
+1 processes with 123 diagrams generated in 0.160 s
 Total: 1 processes with 123 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
-Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
-It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 171][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_gg_ttgg 
 INFO: remove old information in CODEGEN_mad_gg_ttgg 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 176][0m [0m
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg 
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards [0m
@@ -178,18 +170,18 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 
 INFO: Processing color information for process: g g > t t~ g g @1 
 INFO: Creating files in directory P1_gg_ttxgg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 105 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [1;30m[model_handling.py at line 1629][0m [0m
-Generated helas calls for 1 subprocesses (123 diagrams) in 0.441 s
-Wrote files for 222 helas calls in 0.687 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 105 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [1;30m[model_handling.py at line 1552][0m [0m
+Generated helas calls for 1 subprocesses (123 diagrams) in 0.426 s
+Wrote files for 222 helas calls in 0.660 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
@@ -197,14 +189,14 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.344 s
+ALOHA: aloha creates 5 routines in  0.331 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 10 routines in  0.324 s
+ALOHA: aloha creates 10 routines in  0.314 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -217,14 +209,12 @@ ALOHA: aloha creates 10 routines in  0.324 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV3
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./HelAmps_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
@@ -243,16 +233,16 @@ Hunk #2 succeeded at 280 (offset 8 lines).
 Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
 Hunk #2 succeeded at 275 (offset 61 lines).
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 264][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m3.907s
-user	0m3.612s
-sys	0m0.284s
+real	0m3.856s
+user	0m3.505s
+sys	0m0.295s
 Code generation completed in 4 seconds
 ************************************************************
 *                                                          *
@@ -266,7 +256,7 @@ Code generation completed in 4 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -296,7 +286,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt
index 2f2d518acf..641c68b009 100644
--- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt
+++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect         2024-06-17         *
+*         VERSION 3.6.0                 2024-09-30         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -57,7 +58,7 @@ generate g g > t t~ g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005728960037231445 [0m
+[1;32mDEBUG: model prefixing  takes 0.006249666213989258 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -150,38 +151,33 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1  
 INFO: Process has 123 diagrams 
-1 processes with 123 diagrams generated in 0.164 s
+1 processes with 123 diagrams generated in 0.160 s
 Total: 1 processes with 123 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
-Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
-It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 171][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 176][0m [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg 
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 
 INFO: Processing color information for process: g g > t t~ g g @1 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 212][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 213][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 214][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 215][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 216][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 218][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 219][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 220][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 221][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 222][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. 
-Generated helas calls for 1 subprocesses (123 diagrams) in 0.437 s
+Generated helas calls for 1 subprocesses (123 diagrams) in 0.427 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.330 s
+ALOHA: aloha creates 5 routines in  0.325 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -194,19 +190,17 @@ ALOHA: aloha creates 5 routines in  0.330 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV3
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./HelAmps_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. 
 quit
 
-real	0m1.492s
-user	0m1.425s
-sys	0m0.049s
+real	0m1.529s
+user	0m1.382s
+sys	0m0.063s
 Code generation completed in 2 seconds
diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt
index faa5e0d672..4e8f48ed8b 100644
--- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt
+++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect         2024-06-17         *
+*         VERSION 3.6.0                 2024-09-30         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -57,7 +58,7 @@ generate g g > t t~ g g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.0056841373443603516 [0m
+[1;32mDEBUG: model prefixing  takes 0.00632476806640625 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -150,26 +151,17 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1  
 INFO: Process has 1240 diagrams 
-1 processes with 1240 diagrams generated in 1.950 s
+1 processes with 1240 diagrams generated in 1.858 s
 Total: 1 processes with 1240 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
-Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
-It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 171][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_gg_ttggg 
 INFO: remove old information in CODEGEN_mad_gg_ttggg 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 176][0m [0m
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg 
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards [0m
@@ -180,18 +172,18 @@ INFO: Processing color information for process: g g > t t~ g g g @1
 INFO: Creating files in directory P1_gg_ttxggg 
 INFO: Computing Color-Flow optimization [15120 term] 
 INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 945 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [1;30m[model_handling.py at line 1629][0m [0m
-Generated helas calls for 1 subprocesses (1240 diagrams) in 6.769 s
-Wrote files for 2281 helas calls in 18.956 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 945 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [1;30m[model_handling.py at line 1552][0m [0m
+Generated helas calls for 1 subprocesses (1240 diagrams) in 6.525 s
+Wrote files for 2281 helas calls in 18.363 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
@@ -199,14 +191,14 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.375 s
+ALOHA: aloha creates 5 routines in  0.361 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 10 routines in  0.325 s
+ALOHA: aloha creates 10 routines in  0.312 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -219,14 +211,12 @@ ALOHA: aloha creates 10 routines in  0.325 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV3
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./HelAmps_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
@@ -245,17 +235,17 @@ Hunk #2 succeeded at 280 (offset 8 lines).
 Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
 Hunk #2 succeeded at 339 (offset 125 lines).
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 264][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m33.613s
-user	0m33.028s
-sys	0m0.467s
-Code generation completed in 34 seconds
+real	0m32.585s
+user	0m32.009s
+sys	0m0.446s
+Code generation completed in 33 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
@@ -268,7 +258,7 @@ Code generation completed in 34 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -298,7 +288,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt
index a36d10f3c3..c4b2d61a21 100644
--- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt
+++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect         2024-06-17         *
+*         VERSION 3.6.0                 2024-09-30         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -57,7 +58,7 @@ generate g g > t t~ g g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005714893341064453 [0m
+[1;32mDEBUG: model prefixing  takes 0.006146430969238281 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -150,38 +151,33 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1  
 INFO: Process has 1240 diagrams 
-1 processes with 1240 diagrams generated in 1.936 s
+1 processes with 1240 diagrams generated in 1.893 s
 Total: 1 processes with 1240 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
-Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
-It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 171][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 176][0m [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg 
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 
 INFO: Processing color information for process: g g > t t~ g g g @1 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 212][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 213][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 214][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 215][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 216][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 218][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 219][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 220][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 221][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 222][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. 
-Generated helas calls for 1 subprocesses (1240 diagrams) in 6.780 s
+Generated helas calls for 1 subprocesses (1240 diagrams) in 6.631 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.360 s
+ALOHA: aloha creates 5 routines in  0.351 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -194,19 +190,17 @@ ALOHA: aloha creates 5 routines in  0.360 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV3
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./HelAmps_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. 
 quit
 
-real	0m13.504s
-user	0m13.319s
-sys	0m0.105s
-Code generation completed in 13 seconds
+real	0m13.234s
+user	0m12.950s
+sys	0m0.101s
+Code generation completed in 14 seconds
diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt
index 4d28461afe..b3ce4a6716 100644
--- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt
+++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect         2024-06-17         *
+*         VERSION 3.6.0                 2024-09-30         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -56,7 +57,7 @@ set zerowidth_tchannel F
 define q = u c d s u~ c~ d~ s~
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005707502365112305 [0m
+[1;32mDEBUG: model prefixing  takes 0.0063931941986083984 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -165,26 +166,17 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams.
 INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. 
 INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. 
 INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. 
-8 processes with 40 diagrams generated in 0.081 s
+8 processes with 40 diagrams generated in 0.078 s
 Total: 8 processes with 40 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
-Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
-It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 171][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_gq_ttq 
 INFO: remove old information in CODEGEN_mad_gq_ttq 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 176][0m [0m
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq 
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards [0m
@@ -201,51 +193,49 @@ INFO: Combined process g c~ > t t~ c~ WEIGHTED<=3 @1 with process g u~ > t t~ u~
 INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Creating files in directory P1_gu_ttxu 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gu_ttxu 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P1_gux_ttxux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gux_ttxux 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1629][0m [0m
-Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s
-Wrote files for 32 helas calls in 0.169 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1552][0m [0m
+Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s
+Wrote files for 32 helas calls in 0.163 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.149 s
+ALOHA: aloha creates 2 routines in  0.145 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVV1 routines[0m
-ALOHA: aloha creates 4 routines in  0.138 s
+ALOHA: aloha creates 4 routines in  0.132 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./HelAmps_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
@@ -273,16 +263,16 @@ Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
 Hunk #1 succeeded at 74 (offset 3 lines).
 Hunk #2 succeeded at 254 (offset 40 lines).
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 264][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.255s
-user	0m1.967s
-sys	0m0.282s
+real	0m2.176s
+user	0m1.872s
+sys	0m0.303s
 Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
@@ -296,7 +286,7 @@ Code generation completed in 2 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -326,7 +316,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt
index c13cc9fd71..6483e0d003 100644
--- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt
+++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect         2024-06-17         *
+*         VERSION 3.6.0                 2024-09-30         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -56,7 +57,7 @@ set zerowidth_tchannel F
 define q = u c d s u~ c~ d~ s~
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005742549896240234 [0m
+[1;32mDEBUG: model prefixing  takes 0.006114006042480469 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -165,17 +166,12 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams.
 INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. 
 INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. 
 INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. 
-8 processes with 40 diagrams generated in 0.082 s
+8 processes with 40 diagrams generated in 0.077 s
 Total: 8 processes with 40 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
-Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
-It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 171][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 176][0m [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq 
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g u > t t~ u WEIGHTED<=3 @1 
@@ -188,47 +184,45 @@ INFO: Processing color information for process: g u~ > t t~ u~ @1
 INFO: Combined process g c~ > t t~ c~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 212][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 213][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 214][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 215][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 216][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 218][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 219][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 220][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 221][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 222][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/. 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 212][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 213][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 214][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=1 [1;30m[output.py at line 215][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 216][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 218][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 219][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 220][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=1 [1;30m[output.py at line 221][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 222][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. 
-Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s
+Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.149 s
+ALOHA: aloha creates 2 routines in  0.146 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./HelAmps_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. 
 quit
 
-real	0m0.677s
-user	0m0.609s
-sys	0m0.053s
-Code generation completed in 1 seconds
+real	0m0.934s
+user	0m0.600s
+sys	0m0.050s
+Code generation completed in 0 seconds
diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt
index 2dd3979088..0ae7218027 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt
+++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect         2024-06-17         *
+*         VERSION 3.6.0                 2024-09-30         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -122,26 +123,17 @@ Defined multiparticle all = g u c d s u~ c~ d~ s~ a ve vm vt e- mu- ve~ vm~ vt~
 generate g g > b b~ HIW<=1
 INFO: Trying process: g g > b b~ HIG<=1 HIW<=1 @1  
 INFO: Process has 4 diagrams 
-1 processes with 4 diagrams generated in 0.006 s
+1 processes with 4 diagrams generated in 0.005 s
 Total: 1 processes with 4 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_heft_gg_bb --hel_recycling=False --vector_size=32
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
-Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
-It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 171][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_heft_gg_bb 
 INFO: remove old information in CODEGEN_mad_heft_gg_bb 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 176][0m [0m
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb 
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards [0m
@@ -150,45 +142,43 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > b b~ HIG<=1 HIW<=1 @1 
 INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 
 INFO: Creating files in directory P1_gg_bbx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > b b~ HIG<=1 HIW<=1 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_bbx 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 4 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 4 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1552][0m [0m
 Generated helas calls for 1 subprocesses (4 diagrams) in 0.009 s
-Wrote files for 12 helas calls in 0.078 s
+Wrote files for 12 helas calls in 0.076 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVS3 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFS2 routines[0m
-ALOHA: aloha creates 4 routines in  0.272 s
+ALOHA: aloha creates 4 routines in  0.264 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVS3 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFS2 routines[0m
-ALOHA: aloha creates 8 routines in  0.257 s
+ALOHA: aloha creates 8 routines in  0.249 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVS3
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFS2
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./HelAmps_heft.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./HelAmps_heft.h
 INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./Parameters_heft.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./Parameters_heft.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./Parameters_heft.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./Parameters_heft.cc
 INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory 
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
@@ -207,17 +197,17 @@ Hunk #2 succeeded at 280 (offset 8 lines).
 Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
 Hunk #2 succeeded at 227 (offset 13 lines).
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 264][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m3.127s
-user	0m1.903s
-sys	0m0.292s
-Code generation completed in 3 seconds
+real	0m2.343s
+user	0m1.855s
+sys	0m0.277s
+Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
@@ -230,7 +220,7 @@ Code generation completed in 3 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -260,7 +250,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt
index 5f4a381d59..78ac3c603d 100644
--- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt
+++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect         2024-06-17         *
+*         VERSION 3.6.0                 2024-09-30         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -57,11 +58,6 @@ set auto_convert_model T
 save options auto_convert_model
 save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt
 import model heft
-INFO: load particles 
-INFO: load vertices 
-[1;34mWARNING: coupling GC_13=-(complex(0,1)*GH) has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model. [0m
-[1;34mWARNING: coupling GC_16=(complex(0,1)*Gphi)/8. has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model. [0m
-[1;32mDEBUG: model prefixing  takes 0.006167888641357422 [0m
 INFO: Restrict model heft with file models/heft/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: s u w+ at order: QED=1 [0m
@@ -130,26 +126,21 @@ INFO: Process has 4 diagrams
 1 processes with 4 diagrams generated in 0.006 s
 Total: 1 processes with 4 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_heft_gg_bb
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
-Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
-It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 171][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 176][0m [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb 
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > b b~ HIG<=1 HIW<=1 @1 
 INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 212][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 213][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 214][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 215][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 216][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 218][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 219][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 220][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 221][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 222][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/. 
 Generated helas calls for 1 subprocesses (4 diagrams) in 0.008 s
 ALOHA: aloha starts to compute helicity amplitudes
@@ -157,26 +148,24 @@ ALOHA: aloha creates VVS3 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFS2 routines[0m
-ALOHA: aloha creates 4 routines in  0.270 s
+ALOHA: aloha creates 4 routines in  0.260 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVS3
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFS2
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./HelAmps_heft.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./HelAmps_heft.h
 INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./Parameters_heft.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./Parameters_heft.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./Parameters_heft.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./Parameters_heft.cc
 INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory 
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. 
 quit
 
-real	0m0.685s
-user	0m0.620s
-sys	0m0.055s
-Code generation completed in 0 seconds
+real	0m1.090s
+user	0m0.571s
+sys	0m0.062s
+Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt
index 5649c9490d..1f74eb715f 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect         2024-06-17         *
+*         VERSION 3.6.0                 2024-09-30         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -56,7 +57,7 @@ set zerowidth_tchannel F
 import model sm-no_b_mass
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005677461624145508 [0m
+[1;32mDEBUG: model prefixing  takes 0.006134510040283203 [0m
 INFO: Restrict model sm-no_b_mass with file models/sm/restrict_no_b_mass.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -180,7 +181,7 @@ INFO: Process u~ d > t t~ w- added to mirror process d u~ > t t~ w-
 INFO: Process c~ s > t t~ w- added to mirror process s c~ > t t~ w- 
 INFO: Process d~ u > t t~ w+ added to mirror process u d~ > t t~ w+ 
 INFO: Process s~ c > t t~ w+ added to mirror process c s~ > t t~ w+ 
-4 processes with 8 diagrams generated in 0.112 s
+4 processes with 8 diagrams generated in 0.106 s
 Total: 4 processes with 8 diagrams
 add process p p > t t~ w j @1
 INFO: Checking for minimal orders which gives processes. 
@@ -222,26 +223,17 @@ INFO: Process d~ g > t t~ w+ u~ added to mirror process g d~ > t t~ w+ u~
 INFO: Process d~ u > t t~ w+ g added to mirror process u d~ > t t~ w+ g 
 INFO: Process s~ g > t t~ w+ c~ added to mirror process g s~ > t t~ w+ c~ 
 INFO: Process s~ c > t t~ w+ g added to mirror process c s~ > t t~ w+ g 
-12 processes with 144 diagrams generated in 0.663 s
+12 processes with 144 diagrams generated in 0.648 s
 Total: 16 processes with 152 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_nobm_pp_ttW --hel_recycling=False --vector_size=32
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
-Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
-It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 171][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_nobm_pp_ttW 
 INFO: remove old information in CODEGEN_mad_nobm_pp_ttW 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 176][0m [0m
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW 
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards [0m
@@ -272,106 +264,106 @@ INFO: Generating Helas calls for process: d u~ > t t~ w- WEIGHTED<=4
 INFO: Reusing existing color information for process: d u~ > t t~ w- 
 INFO: Combined process s c~ > t t~ w- WEIGHTED<=4 with process d u~ > t t~ w- WEIGHTED<=4 
 INFO: Creating files in directory P1_gu_ttxwpd 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g u > t t~ w+ d WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group gu_ttxwpd 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P1_gd_ttxwmu 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g d > t t~ w- u WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group gd_ttxwmu 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P1_gux_ttxwmdx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g u~ > t t~ w- d~ WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group gux_ttxwmdx 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P1_gdx_ttxwpux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g d~ > t t~ w+ u~ WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group gdx_ttxwpux 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P1_udx_ttxwpg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u d~ > t t~ w+ g WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group udx_ttxwpg 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P1_dux_ttxwmg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: d u~ > t t~ w- g WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group dux_ttxwmg 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P0_udx_ttxwp 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u d~ > t t~ w+ WEIGHTED<=4 
 INFO: Finding symmetric diagrams for subprocess group udx_ttxwp 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 2 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 2 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P0_dux_ttxwm 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: d u~ > t t~ w- WEIGHTED<=4 
 INFO: Finding symmetric diagrams for subprocess group dux_ttxwm 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 2 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1629][0m [0m
-Generated helas calls for 8 subprocesses (76 diagrams) in 0.209 s
-Wrote files for 212 helas calls in 0.849 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 2 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1552][0m [0m
+Generated helas calls for 8 subprocesses (76 diagrams) in 0.201 s
+Wrote files for 212 helas calls in 0.838 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
-ALOHA: aloha creates 3 routines in  0.210 s
+ALOHA: aloha creates 3 routines in  0.203 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
-ALOHA: aloha creates 6 routines in  0.207 s
+ALOHA: aloha creates 6 routines in  0.200 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -379,14 +371,12 @@ ALOHA: aloha creates 6 routines in  0.207 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./HelAmps_sm_no_b_mass.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./HelAmps_sm_no_b_mass.h
 INFO: Created file HelAmps_sm_no_b_mass.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./Parameters_sm_no_b_mass.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./Parameters_sm_no_b_mass.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./Parameters_sm_no_b_mass.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./Parameters_sm_no_b_mass.cc
 INFO: Created files Parameters_sm_no_b_mass.h and Parameters_sm_no_b_mass.cc in directory 
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
@@ -462,17 +452,17 @@ Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
 Hunk #1 succeeded at 72 (offset 1 line).
 Hunk #2 succeeded at 316 (offset 102 lines).
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 264][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m4.776s
-user	0m4.184s
-sys	0m0.578s
-Code generation completed in 5 seconds
+real	0m4.785s
+user	0m4.104s
+sys	0m0.539s
+Code generation completed in 4 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
@@ -485,7 +475,7 @@ Code generation completed in 5 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -515,7 +505,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt
index 1d8855f6c4..4c20a350e7 100644
--- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt
+++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect         2024-06-17         *
+*         VERSION 3.6.0                 2024-09-30         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -56,7 +57,7 @@ set zerowidth_tchannel F
 define j = p
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.0058557987213134766 [0m
+[1;32mDEBUG: model prefixing  takes 0.006647348403930664 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -167,7 +168,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~
 INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ 
 INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ 
 INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ 
-5 processes with 7 diagrams generated in 0.030 s
+5 processes with 7 diagrams generated in 0.031 s
 Total: 5 processes with 7 diagrams
 add process p p > t t~ j @1
 INFO: Checking for minimal orders which gives processes. 
@@ -207,7 +208,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~
 INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g 
 INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ 
 INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g 
-13 processes with 76 diagrams generated in 0.141 s
+13 processes with 76 diagrams generated in 0.146 s
 Total: 18 processes with 83 diagrams
 add process p p > t t~ j j @2
 INFO: Checking for minimal orders which gives processes. 
@@ -373,26 +374,17 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~
 INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ 
 INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ 
 INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. 
-65 processes with 1119 diagrams generated in 1.889 s
+65 processes with 1119 diagrams generated in 1.946 s
 Total: 83 processes with 1202 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
-Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
-It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 171][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_pp_tt012j 
 INFO: remove old information in CODEGEN_mad_pp_tt012j 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 176][0m [0m
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j 
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards [0m
@@ -500,205 +492,205 @@ INFO: Combined process c c~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED
 INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 
 INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 
 INFO: Creating files in directory P2_gg_ttxgg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 105 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 105 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P2_gg_ttxuux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P2_gu_ttxgu 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P2_gux_ttxgux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P2_uux_ttxgg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P1_gg_ttxg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxg 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P2_uu_ttxuu 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P2_uux_ttxuux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P2_uxux_ttxuxux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P2_uc_ttxuc 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P2_uux_ttxccx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P2_ucx_ttxucx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P2_uxcx_ttxuxcx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P1_gu_ttxu 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gu_ttxu 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P1_gux_ttxux 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gux_ttxux 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P1_uux_ttxg 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxg 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P0_gg_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1629][0m [0m
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1552][0m [0m
 INFO: Creating files in directory P0_uux_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttx 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 1 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1} [1;30m[model_handling.py at line 1629][0m [0m
-Generated helas calls for 18 subprocesses (372 diagrams) in 1.328 s
-Wrote files for 810 helas calls in 2.853 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 1 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1} [1;30m[model_handling.py at line 1552][0m [0m
+Generated helas calls for 18 subprocesses (372 diagrams) in 1.306 s
+Wrote files for 810 helas calls in 2.776 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
@@ -706,14 +698,14 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.346 s
+ALOHA: aloha creates 5 routines in  0.338 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 10 routines in  0.324 s
+ALOHA: aloha creates 10 routines in  0.314 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -726,14 +718,12 @@ ALOHA: aloha creates 10 routines in  0.324 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV3
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./HelAmps_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./HelAmps_sm.h
 INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
@@ -886,17 +876,17 @@ Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
 Hunk #1 succeeded at 74 (offset 3 lines).
 Hunk #2 succeeded at 278 (offset 64 lines).
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 264][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m10.783s
-user	0m9.774s
-sys	0m0.974s
-Code generation completed in 11 seconds
+real	0m10.704s
+user	0m9.729s
+sys	0m0.940s
+Code generation completed in 10 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
@@ -909,7 +899,7 @@ Code generation completed in 11 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -939,7 +929,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt
index 1cbc864422..6d15da35b5 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect         2024-06-17         *
+*         VERSION 3.6.0                 2024-09-30         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -72,7 +73,7 @@ INFO: load vertices
 [1;32mDEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1) [0m
 [1;32mDEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3) [0m
 [1;32mDEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1) [0m
-[1;32mDEBUG: model prefixing  takes 0.14407634735107422 [0m
+[1;32mDEBUG: model prefixing  takes 0.12750768661499023 [0m
 INFO: Change particles name to pass to MG5 convention 
 Defined multiparticle p = g u c d s u~ c~ d~ s~
 Defined multiparticle j = g u c d s u~ c~ d~ s~
@@ -87,26 +88,17 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 
 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1  
 INFO: Process has 72 diagrams 
-1 processes with 72 diagrams generated in 3.830 s
+1 processes with 72 diagrams generated in 3.691 s
 Total: 1 processes with 72 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
-Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
-It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 171][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_smeft_gg_tttt 
 INFO: remove old information in CODEGEN_mad_smeft_gg_tttt 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 176][0m [0m
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt 
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards [0m
@@ -115,18 +107,18 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 
 INFO: Processing color information for process: g g > t t~ t t~ @1 
 INFO: Creating files in directory P1_gg_ttxttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ t t~ WEIGHTED<=4 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxttx 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 70 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [1;30m[model_handling.py at line 1629][0m [0m
-Generated helas calls for 1 subprocesses (72 diagrams) in 0.194 s
-Wrote files for 119 helas calls in 0.403 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 70 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [1;30m[model_handling.py at line 1552][0m [0m
+Generated helas calls for 1 subprocesses (72 diagrams) in 0.186 s
+Wrote files for 119 helas calls in 0.384 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV5 routines[0m
@@ -134,14 +126,14 @@ ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV9 routines[0m
 ALOHA: aloha creates VVVV10 routines[0m
-ALOHA: aloha creates 5 routines in  0.328 s
+ALOHA: aloha creates 5 routines in  0.318 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV5 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV9 routines[0m
 ALOHA: aloha creates VVVV10 routines[0m
-ALOHA: aloha creates 10 routines in  0.343 s
+ALOHA: aloha creates 10 routines in  0.332 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -151,14 +143,12 @@ ALOHA: aloha creates 10 routines in  0.343 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV9
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV10
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
 INFO: Created file HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc
 INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc in directory 
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
@@ -177,17 +167,17 @@ Hunk #2 succeeded at 280 (offset 8 lines).
 Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
 Hunk #2 succeeded at 275 (offset 61 lines).
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 264][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m7.380s
-user	0m7.061s
-sys	0m0.302s
-Code generation completed in 8 seconds
+real	0m7.131s
+user	0m6.830s
+sys	0m0.285s
+Code generation completed in 7 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
@@ -200,7 +190,7 @@ Code generation completed in 8 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -230,7 +220,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
index 3499d26115..7e4394e2dd 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect         2024-06-17         *
+*         VERSION 3.6.0                 2024-09-30         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -72,7 +73,7 @@ INFO: load vertices
 [1;32mDEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1) [0m
 [1;32mDEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3) [0m
 [1;32mDEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1) [0m
-[1;32mDEBUG: model prefixing  takes 0.14396023750305176 [0m
+[1;32mDEBUG: model prefixing  takes 0.1275796890258789 [0m
 INFO: Change particles name to pass to MG5 convention 
 Defined multiparticle p = g u c d s u~ c~ d~ s~
 Defined multiparticle j = g u c d s u~ c~ d~ s~
@@ -87,38 +88,33 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 
 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1  
 INFO: Process has 72 diagrams 
-1 processes with 72 diagrams generated in 3.828 s
+1 processes with 72 diagrams generated in 3.713 s
 Total: 1 processes with 72 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
-Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
-It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 171][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 176][0m [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt 
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 
 INFO: Processing color information for process: g g > t t~ t t~ @1 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 212][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 213][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 214][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 215][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 216][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 218][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 219][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 220][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 221][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 222][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. 
-Generated helas calls for 1 subprocesses (72 diagrams) in 0.192 s
+Generated helas calls for 1 subprocesses (72 diagrams) in 0.193 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV5 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV9 routines[0m
 ALOHA: aloha creates VVVV10 routines[0m
-ALOHA: aloha creates 5 routines in  0.327 s
+ALOHA: aloha creates 5 routines in  0.320 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -128,19 +124,17 @@ ALOHA: aloha creates 5 routines in  0.327 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV9
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV10
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
 INFO: Created file HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc
 INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc in directory 
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. 
 quit
 
-real	0m5.681s
-user	0m5.176s
-sys	0m0.063s
-Code generation completed in 6 seconds
+real	0m5.211s
+user	0m5.033s
+sys	0m0.067s
+Code generation completed in 5 seconds
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt
index f16a211807..90e13a925d 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect         2024-06-17         *
+*         VERSION 3.6.0                 2024-09-30         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -549,26 +550,17 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1  
 INFO: Process has 6 diagrams 
-1 processes with 6 diagrams generated in 0.127 s
+1 processes with 6 diagrams generated in 0.123 s
 Total: 1 processes with 6 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
-Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
-It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 171][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_susy_gg_t1t1 
 INFO: remove old information in CODEGEN_mad_susy_gg_t1t1 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 176][0m [0m
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1 [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1 
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards [0m
@@ -577,42 +569,40 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t1 t1~ @1 
 INFO: Creating files in directory P1_gg_t1t1x 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t1 t1~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_t1t1x 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [1;30m[model_handling.py at line 1629][0m [0m
-Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s
-Wrote files for 16 helas calls in 0.084 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [1;30m[model_handling.py at line 1552][0m [0m
+Generated helas calls for 1 subprocesses (6 diagrams) in 0.009 s
+Wrote files for 16 helas calls in 0.081 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VSS1 routines[0m
 ALOHA: aloha creates VVSS1 routines[0m
-ALOHA: aloha creates 3 routines in  0.188 s
+ALOHA: aloha creates 3 routines in  0.191 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VSS1 routines[0m
 ALOHA: aloha creates VVSS1 routines[0m
-ALOHA: aloha creates 6 routines in  0.186 s
+ALOHA: aloha creates 6 routines in  0.181 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
 <class 'aloha.create_aloha.AbstractRoutine'> VVSS1
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./HelAmps_MSSM_SLHA2.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./HelAmps_MSSM_SLHA2.h
 INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.cc
 INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in directory 
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
@@ -631,16 +621,16 @@ Hunk #2 succeeded at 280 (offset 8 lines).
 Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
 Hunk #2 succeeded at 215 (offset 1 line).
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 264][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1 done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m3.170s
-user	0m2.747s
-sys	0m0.300s
+real	0m2.979s
+user	0m2.658s
+sys	0m0.321s
 Code generation completed in 3 seconds
 ************************************************************
 *                                                          *
@@ -654,7 +644,7 @@ Code generation completed in 3 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -684,7 +674,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt
index bf486a39f2..853e6fc8f7 100644
--- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt
+++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect         2024-06-17         *
+*         VERSION 3.6.0                 2024-09-30         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -549,54 +550,47 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1  
 INFO: Process has 6 diagrams 
-1 processes with 6 diagrams generated in 0.127 s
+1 processes with 6 diagrams generated in 0.124 s
 Total: 1 processes with 6 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
-Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
-It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 171][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 176][0m [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t1 t1~ @1 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 212][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 213][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 214][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 215][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 216][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 218][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 219][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 220][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 221][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 222][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/. 
 Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VSS1 routines[0m
 ALOHA: aloha creates VVSS1 routines[0m
-ALOHA: aloha creates 3 routines in  0.192 s
+ALOHA: aloha creates 3 routines in  0.182 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
 <class 'aloha.create_aloha.AbstractRoutine'> VVSS1
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./HelAmps_MSSM_SLHA2.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./HelAmps_MSSM_SLHA2.h
 INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.cc
 INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in directory 
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. 
 quit
 
-real	0m1.498s
-user	0m1.289s
-sys	0m0.074s
+real	0m1.331s
+user	0m1.253s
+sys	0m0.069s
 Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt
index 1a997e0de0..cfa8b980ff 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt
+++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect         2024-06-17         *
+*         VERSION 3.6.0                 2024-09-30         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -549,26 +550,17 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.123 s
+1 processes with 3 diagrams generated in 0.124 s
 Total: 1 processes with 3 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
-Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
-It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 171][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_susy_gg_tt 
 INFO: remove old information in CODEGEN_mad_susy_gg_tt 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 176][0m [0m
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt 
 [1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards [0m
@@ -577,23 +569,23 @@ INFO: Organizing processes into subprocess groups
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
 INFO: Creating files in directory P1_gg_ttx 
-[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1229][0m [0m
+[1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1156][0m [0m
 INFO: Creating files in directory . 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
-[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1604][0m [0m
-[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1628][0m [0m
-[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1629][0m [0m
-Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
-Wrote files for 10 helas calls in 0.077 s
+[1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1527][0m [0m
+[1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1551][0m [0m
+[1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1552][0m [0m
+Generated helas calls for 1 subprocesses (3 diagrams) in 0.007 s
+Wrote files for 10 helas calls in 0.075 s
 [1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.140 s
+ALOHA: aloha creates 2 routines in  0.136 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
@@ -602,14 +594,12 @@ ALOHA: aloha creates 4 routines in  0.136 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./HelAmps_MSSM_SLHA2.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./HelAmps_MSSM_SLHA2.h
 INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./Parameters_MSSM_SLHA2.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./Parameters_MSSM_SLHA2.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./Parameters_MSSM_SLHA2.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./Parameters_MSSM_SLHA2.cc
 INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in directory 
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
@@ -628,16 +618,16 @@ Hunk #2 succeeded at 280 (offset 8 lines).
 Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
 Hunk #2 succeeded at 227 (offset 13 lines).
-[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
+[1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 264][0m [0m
 Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt done.
 Type "launch" to generate events from this process, or see
 /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.967s
-user	0m2.605s
-sys	0m0.308s
+real	0m3.046s
+user	0m2.549s
+sys	0m0.290s
 Code generation completed in 3 seconds
 ************************************************************
 *                                                          *
@@ -651,7 +641,7 @@ Code generation completed in 3 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -681,7 +671,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect                            *
+*         VERSION 3.6.0                                    *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt
index 0f424b45da..8f97de9855 100644
--- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt
+++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.6.0_lo_vect         2024-06-17         *
+*         VERSION 3.6.0                 2024-09-30         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -54,9 +55,6 @@ set stdout_level DEBUG
 set output information to level: 10
 set zerowidth_tchannel F
 import model MSSM_SLHA2
-INFO: load particles 
-INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.9416484832763672 [0m
 INFO: Restrict model MSSM_SLHA2 with file models/MSSM_SLHA2/restrict_default.dat . 
 INFO: Detect SLHA2 format. keeping restricted parameter in the param_card 
 [1;32mDEBUG: Simplifying conditional expressions [0m
@@ -552,52 +550,45 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.112 s
+1 processes with 3 diagrams generated in 0.125 s
 Total: 1 processes with 3 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt
-[1;34merror detected in plugin: maddm.[0m
-[1;34mNo module named 'maddm_interface'[0m
-Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
-It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 171][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 176][0m [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt 
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 212][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 213][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 214][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 215][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 216][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 218][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 219][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 220][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 221][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 222][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/. 
 Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.142 s
+ALOHA: aloha creates 2 routines in  0.145 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./HelAmps_MSSM_SLHA2.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./HelAmps_MSSM_SLHA2.h
 INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/../SubProcesses/./HelAmps.cc
-INFO: Created file HelAmps.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/../SubProcesses/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./Parameters_MSSM_SLHA2.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./Parameters_MSSM_SLHA2.cc
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./Parameters_MSSM_SLHA2.h
+FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./Parameters_MSSM_SLHA2.cc
 INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in directory 
 INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. 
 quit
 
-real	0m2.450s
-user	0m2.347s
-sys	0m0.065s
+real	0m1.357s
+user	0m1.261s
+sys	0m0.066s
 Code generation completed in 2 seconds